From 1aec85974ab79903aaaab7d1f7fffe3d1ad1eee2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 27 Oct 2021 08:31:22 -0700 Subject: [PATCH 01/14] net/mlx5: Add esw assignment back in mlx5e_tc_sample_unoffload() Clang warns: drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c:635:34: error: variable 'esw' is uninitialized when used here [-Werror,-Wuninitialized] mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); ^~~ drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c:626:26: note: initialize the variable 'esw' to silence this warning struct mlx5_eswitch *esw; ^ = NULL 1 error generated. It appears that the assignment should have been shuffled instead of removed outright like in mlx5e_tc_sample_offload(). Add it back so there is no use of esw uninitialized. Fixes: a64c5edbd20e ("net/mlx5: Remove unnecessary checks for slow path flag") Link: https://github.com/ClangBuiltLinux/linux/issues/1494 Signed-off-by: Nathan Chancellor Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index 1046b7ea5c88..df6888c4793c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -631,6 +631,7 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, /* The following delete order can't be changed, otherwise, * will hit fw syndromes. */ + esw = tc_psample->esw; sample_flow = attr->sample_attr->sample_flow; mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); From ae2ee3be99a87fdbc01bd82e77eb26dcb69d874a Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 31 Aug 2021 11:57:52 +0300 Subject: [PATCH 02/14] net/mlx5: CT: Remove warning of ignore_flow_level support for VFs ignore_flow_level isn't supported for VFs, and so it causes post_act and ct to warn about it. Instead of disabling CT for VFs, and a driver update will be need to enable CT again once firmware support this, remove this warning specifically for VFs. This way, it could be automatically enabled on future firmwares where VFs support ignore_flow_level capability. Signed-off-by: Paul Blakey Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc/post_act.c | 13 ++++--- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 34 ++++++++++++------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index a3e43e898a56..31b4e39be2d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -4,6 +4,7 @@ #include "en_tc.h" #include "post_act.h" #include "mlx5_core.h" +#include "fs_core.h" struct mlx5e_post_act { enum mlx5_flow_namespace_type ns_type; @@ -28,16 +29,14 @@ struct mlx5e_post_act * mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, enum mlx5_flow_namespace_type ns_type) { + enum fs_flow_table_type table_type = ns_type == MLX5_FLOW_NAMESPACE_FDB ? + FS_FT_FDB : FS_FT_NIC_RX; struct mlx5e_post_act *post_act; int err; - if (ns_type == MLX5_FLOW_NAMESPACE_FDB && - !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ignore_flow_level)) { - mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); - err = -EOPNOTSUPP; - goto err_check; - } else if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { - mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { + if (priv->mdev->coredev_type != MLX5_COREDEV_VF) + mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); err = -EOPNOTSUPP; goto err_check; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 740cd6f088b8..f44e5de25037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2039,25 +2039,36 @@ mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, static int mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, enum mlx5_flow_namespace_type ns_type, - struct mlx5e_post_act *post_act, - const char **err_msg) + struct mlx5e_post_act *post_act) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + const char *err_msg = NULL; + int err = 0; #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) /* cannot restore chain ID on HW miss */ - *err_msg = "tc skb extension missing"; - return -EOPNOTSUPP; + err_msg = "tc skb extension missing"; + err = -EOPNOTSUPP; + goto out_err; #endif if (IS_ERR_OR_NULL(post_act)) { - *err_msg = "tc ct offload not supported, post action is missing"; - return -EOPNOTSUPP; + /* Ignore_flow_level support isn't supported by default for VFs and so post_act + * won't be supported. Skip showing error msg. + */ + if (priv->mdev->coredev_type != MLX5_COREDEV_VF) + err_msg = "post action is missing"; + err = -EOPNOTSUPP; + goto out_err; } if (ns_type == MLX5_FLOW_NAMESPACE_FDB) - return mlx5_tc_ct_init_check_esw_support(esw, err_msg); - return 0; + err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg); + +out_err: + if (err && err_msg) + netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg); + return err; } #define INIT_ERR_PREFIX "tc ct offload init failed" @@ -2070,16 +2081,13 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, { struct mlx5_tc_ct_priv *ct_priv; struct mlx5_core_dev *dev; - const char *msg; u64 mapping_id; int err; dev = priv->mdev; - err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg); - if (err) { - mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg); + err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); + if (err) goto err_support; - } ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); if (!ct_priv) From 428ffea0711a11efa0c1c4ee1fac27903ed091be Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Tue, 26 Oct 2021 10:10:42 +0300 Subject: [PATCH 03/14] net/mlx5e: IPsec: Refactor checksum code in tx data path Part of code that is related solely to IPsec is always compiled in the driver code regardless if the IPsec functionality is enabled or disabled in the driver code, this will add unnecessary branch in case IPsec is disabled at Tx data path. Move IPsec related code to IPsec related file such that in case of IPsec is disabled and because of unlikely macro the compiler should be able to optimize and omit the checksum IPsec code all together from Tx data path Signed-off-by: Raed Salem Reviewed-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 26 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_tx.c | 20 ++------------ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 5120a59361e6..b98db50c3418 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -127,6 +127,25 @@ mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } +static inline bool +mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + if (!mlx5e_ipsec_eseg_meta(eseg)) + return false; + + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (xo->inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial_inner++; + } + + return true; +} #else static inline void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, @@ -143,6 +162,13 @@ static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; static inline netdev_features_t mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) { return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } + +static inline bool +mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + return false; +} #endif /* CONFIG_MLX5_EN_IPSEC */ #endif /* __MLX5E_IPSEC_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 188994d091c5..7fd33b356cc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -38,6 +38,7 @@ #include "en/txrx.h" #include "ipoib/ipoib.h" #include "en_accel/en_accel.h" +#include "en_accel/ipsec_rxtx.h" #include "en/ptp.h" static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) @@ -213,30 +214,13 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); } -static void -ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5_wqe_eth_seg *eseg) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; - if (xo->inner_ipproto) { - eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM; - } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { - eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; - sq->stats->csum_partial_inner++; - } -} - static inline void mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, struct mlx5_wqe_eth_seg *eseg) { - if (unlikely(mlx5e_ipsec_eseg_meta(eseg))) { - ipsec_txwqe_build_eseg_csum(sq, skb, eseg); + if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg))) return; - } if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; From 504e15724893a839213fad5eedfbd511d9ba75cc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 11 Jul 2021 16:56:54 +0300 Subject: [PATCH 04/14] net/mlx5: Allow skipping counter refresh on creation CT creates a counter for each CT rule, and for each such counter, fs_counters tries to queue mlx5_fc_stats_work() work again via mod_delayed_work(0) call to refresh all counters. This call has a large performance impact when reaching high insertion rate and accounts for ~8% of the insertion time when using software steering. Allow skipping the refresh of all counters during counter creation. Change CT to use this refresh skipping for it's counters. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +- .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 14 +++++++++++--- include/linux/mlx5/fs.h | 4 ++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index f44e5de25037..c1c6e74c79c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -889,7 +889,7 @@ mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) return ERR_PTR(-ENOMEM); counter->is_shared = false; - counter->counter = mlx5_fc_create(ct_priv->dev, true); + counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); if (IS_ERR(counter->counter)) { ct_dbg("Failed to create counter for ct entry"); ret = PTR_ERR(counter->counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 60c9df1bc912..31c99d53faf7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -301,7 +301,7 @@ static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) return mlx5_fc_single_alloc(dev); } -struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging) { struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; @@ -332,8 +332,6 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) goto err_out_alloc; llist_add(&counter->addlist, &fc_stats->addlist); - - mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); } return counter; @@ -342,6 +340,16 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) mlx5_fc_release(dev, counter); return ERR_PTR(err); } + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (aging) + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return counter; +} EXPORT_SYMBOL(mlx5_fc_create); u32 mlx5_fc_id(struct mlx5_fc *counter) diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index a7e1155bc4da..cd2d4c572367 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -245,6 +245,10 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_flow_destination *old_dest); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); + +/* As mlx5_fc_create() but doesn't queue stats refresh thread. */ +struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging); + void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, From 941f19798a11c46c6700cf0ff6f1d810a491b63b Mon Sep 17 00:00:00 2001 From: Muhammad Sammar Date: Mon, 5 Jul 2021 15:39:38 +0300 Subject: [PATCH 05/14] net/mlx5: DR, Add check for unsupported fields in match param When a matcher is being built, we "consume" (clear) mask fields one by one, and to verify that we do support all the required fields we check if the whole mask was consumed, else the matching request includes unsupported fields. Signed-off-by: Muhammad Sammar Signed-off-by: Saeed Mahameed Reviewed-by: Yevgeny Kliteynik --- .../mellanox/mlx5/core/steering/dr_matcher.c | 28 +- .../mellanox/mlx5/core/steering/dr_rule.c | 2 +- .../mellanox/mlx5/core/steering/dr_ste.c | 260 ++++++++++-------- .../mellanox/mlx5/core/steering/dr_types.h | 3 +- 4 files changed, 166 insertions(+), 127 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index b5409cc021d3..75c775bee351 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -875,9 +875,10 @@ static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher) static int dr_matcher_init(struct mlx5dr_matcher *matcher, struct mlx5dr_match_parameters *mask) { + struct mlx5dr_match_parameters consumed_mask; struct mlx5dr_table *tbl = matcher->tbl; struct mlx5dr_domain *dmn = tbl->dmn; - int ret; + int i, ret; if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { mlx5dr_err(dmn, "Invalid match criteria attribute\n"); @@ -889,8 +890,16 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, mlx5dr_err(dmn, "Invalid match size attribute\n"); return -EINVAL; } + + consumed_mask.match_buf = kzalloc(mask->match_sz, GFP_KERNEL); + if (!consumed_mask.match_buf) + return -ENOMEM; + + consumed_mask.match_sz = mask->match_sz; + memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz); mlx5dr_ste_copy_param(matcher->match_criteria, - &matcher->mask, mask); + &matcher->mask, &consumed_mask, + true); } switch (dmn->type) { @@ -909,9 +918,22 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, break; default: WARN_ON(true); - return -EINVAL; + ret = -EINVAL; + goto free_consumed_mask; } + /* Check that all mask data was consumed */ + for (i = 0; i < consumed_mask.match_sz; i++) { + if (consumed_mask.match_buf[i]) { + mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n"); + ret = -EOPNOTSUPP; + goto free_consumed_mask; + } + } + + ret = 0; +free_consumed_mask: + kfree(consumed_mask.match_buf); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 323ea138ad99..6a390e981b09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -917,7 +917,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, return false; } - mlx5dr_ste_copy_param(matcher->match_criteria, param, value); + mlx5dr_ste_copy_param(matcher->match_criteria, param, value, false); if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { s_idx = offsetof(struct mlx5dr_match_param, outer); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 1cdfe4fccc7a..219a5474a8a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -668,101 +668,116 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, return 0; } -static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec) +#define IFC_GET_CLR(typ, p, fld, clear) ({ \ + void *__p = (p); \ + u32 __t = MLX5_GET(typ, __p, fld); \ + if (clear) \ + MLX5_SET(typ, __p, fld, 0); \ + __t; \ +}) + +#define memcpy_and_clear(to, from, len, clear) ({ \ + void *__to = (to), *__from = (from); \ + size_t __len = (len); \ + memcpy(__to, __from, __len); \ + if (clear) \ + memset(__from, 0, __len); \ +}) + +static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bool clr) { - spec->gre_c_present = MLX5_GET(fte_match_set_misc, mask, gre_c_present); - spec->gre_k_present = MLX5_GET(fte_match_set_misc, mask, gre_k_present); - spec->gre_s_present = MLX5_GET(fte_match_set_misc, mask, gre_s_present); - spec->source_vhca_port = MLX5_GET(fte_match_set_misc, mask, source_vhca_port); - spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn); + spec->gre_c_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_c_present, clr); + spec->gre_k_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_k_present, clr); + spec->gre_s_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_s_present, clr); + spec->source_vhca_port = IFC_GET_CLR(fte_match_set_misc, mask, source_vhca_port, clr); + spec->source_sqn = IFC_GET_CLR(fte_match_set_misc, mask, source_sqn, clr); - spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port); - spec->source_eswitch_owner_vhca_id = MLX5_GET(fte_match_set_misc, mask, - source_eswitch_owner_vhca_id); + spec->source_port = IFC_GET_CLR(fte_match_set_misc, mask, source_port, clr); + spec->source_eswitch_owner_vhca_id = + IFC_GET_CLR(fte_match_set_misc, mask, source_eswitch_owner_vhca_id, clr); - spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio); - spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi); - spec->outer_second_vid = MLX5_GET(fte_match_set_misc, mask, outer_second_vid); - spec->inner_second_prio = MLX5_GET(fte_match_set_misc, mask, inner_second_prio); - spec->inner_second_cfi = MLX5_GET(fte_match_set_misc, mask, inner_second_cfi); - spec->inner_second_vid = MLX5_GET(fte_match_set_misc, mask, inner_second_vid); + spec->outer_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_prio, clr); + spec->outer_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cfi, clr); + spec->outer_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_vid, clr); + spec->inner_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_prio, clr); + spec->inner_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cfi, clr); + spec->inner_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_vid, clr); spec->outer_second_cvlan_tag = - MLX5_GET(fte_match_set_misc, mask, outer_second_cvlan_tag); + IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cvlan_tag, clr); spec->inner_second_cvlan_tag = - MLX5_GET(fte_match_set_misc, mask, inner_second_cvlan_tag); + IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cvlan_tag, clr); spec->outer_second_svlan_tag = - MLX5_GET(fte_match_set_misc, mask, outer_second_svlan_tag); + IFC_GET_CLR(fte_match_set_misc, mask, outer_second_svlan_tag, clr); spec->inner_second_svlan_tag = - MLX5_GET(fte_match_set_misc, mask, inner_second_svlan_tag); + IFC_GET_CLR(fte_match_set_misc, mask, inner_second_svlan_tag, clr); + spec->gre_protocol = IFC_GET_CLR(fte_match_set_misc, mask, gre_protocol, clr); - spec->gre_protocol = MLX5_GET(fte_match_set_misc, mask, gre_protocol); + spec->gre_key_h = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.hi, clr); + spec->gre_key_l = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.lo, clr); - spec->gre_key_h = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi); - spec->gre_key_l = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo); + spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr); - spec->vxlan_vni = MLX5_GET(fte_match_set_misc, mask, vxlan_vni); - - spec->geneve_vni = MLX5_GET(fte_match_set_misc, mask, geneve_vni); - spec->geneve_oam = MLX5_GET(fte_match_set_misc, mask, geneve_oam); + spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr); + spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr); spec->outer_ipv6_flow_label = - MLX5_GET(fte_match_set_misc, mask, outer_ipv6_flow_label); + IFC_GET_CLR(fte_match_set_misc, mask, outer_ipv6_flow_label, clr); spec->inner_ipv6_flow_label = - MLX5_GET(fte_match_set_misc, mask, inner_ipv6_flow_label); + IFC_GET_CLR(fte_match_set_misc, mask, inner_ipv6_flow_label, clr); - spec->geneve_opt_len = MLX5_GET(fte_match_set_misc, mask, geneve_opt_len); + spec->geneve_opt_len = IFC_GET_CLR(fte_match_set_misc, mask, geneve_opt_len, clr); spec->geneve_protocol_type = - MLX5_GET(fte_match_set_misc, mask, geneve_protocol_type); + IFC_GET_CLR(fte_match_set_misc, mask, geneve_protocol_type, clr); - spec->bth_dst_qp = MLX5_GET(fte_match_set_misc, mask, bth_dst_qp); + spec->bth_dst_qp = IFC_GET_CLR(fte_match_set_misc, mask, bth_dst_qp, clr); } -static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec) +static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bool clr) { __be32 raw_ip[4]; - spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16); + spec->smac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_47_16, clr); - spec->smac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0); - spec->ethertype = MLX5_GET(fte_match_set_lyr_2_4, mask, ethertype); + spec->smac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_15_0, clr); + spec->ethertype = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ethertype, clr); - spec->dmac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16); + spec->dmac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_47_16, clr); - spec->dmac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0); - spec->first_prio = MLX5_GET(fte_match_set_lyr_2_4, mask, first_prio); - spec->first_cfi = MLX5_GET(fte_match_set_lyr_2_4, mask, first_cfi); - spec->first_vid = MLX5_GET(fte_match_set_lyr_2_4, mask, first_vid); + spec->dmac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_15_0, clr); + spec->first_prio = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_prio, clr); + spec->first_cfi = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_cfi, clr); + spec->first_vid = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_vid, clr); - spec->ip_protocol = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_protocol); - spec->ip_dscp = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_dscp); - spec->ip_ecn = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_ecn); - spec->cvlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, cvlan_tag); - spec->svlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, svlan_tag); - spec->frag = MLX5_GET(fte_match_set_lyr_2_4, mask, frag); - spec->ip_version = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_version); - spec->tcp_flags = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_flags); - spec->tcp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_sport); - spec->tcp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_dport); + spec->ip_protocol = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_protocol, clr); + spec->ip_dscp = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_dscp, clr); + spec->ip_ecn = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_ecn, clr); + spec->cvlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, cvlan_tag, clr); + spec->svlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, svlan_tag, clr); + spec->frag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, frag, clr); + spec->ip_version = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_version, clr); + spec->tcp_flags = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_flags, clr); + spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr); + spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr); - spec->ttl_hoplimit = MLX5_GET(fte_match_set_lyr_2_4, mask, ttl_hoplimit); + spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr); - spec->udp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_sport); - spec->udp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_dport); + spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr); + spec->udp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_dport, clr); - memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - sizeof(raw_ip)); + memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip), clr); spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]); spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]); spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]); spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]); - memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - sizeof(raw_ip)); + memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip), clr); spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]); spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]); @@ -770,104 +785,105 @@ static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec) spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]); } -static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec) +static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec, bool clr) { spec->outer_first_mpls_label = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_label); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_label, clr); spec->outer_first_mpls_exp = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp, clr); spec->outer_first_mpls_s_bos = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos, clr); spec->outer_first_mpls_ttl = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl, clr); spec->inner_first_mpls_label = - MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_label); + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_label, clr); spec->inner_first_mpls_exp = - MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp); + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp, clr); spec->inner_first_mpls_s_bos = - MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos); + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos, clr); spec->inner_first_mpls_ttl = - MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl); + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl, clr); spec->outer_first_mpls_over_gre_label = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label, clr); spec->outer_first_mpls_over_gre_exp = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp, clr); spec->outer_first_mpls_over_gre_s_bos = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos, clr); spec->outer_first_mpls_over_gre_ttl = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl, clr); spec->outer_first_mpls_over_udp_label = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label, clr); spec->outer_first_mpls_over_udp_exp = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp, clr); spec->outer_first_mpls_over_udp_s_bos = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos, clr); spec->outer_first_mpls_over_udp_ttl = - MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl); - spec->metadata_reg_c_7 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_7); - spec->metadata_reg_c_6 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_6); - spec->metadata_reg_c_5 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_5); - spec->metadata_reg_c_4 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_4); - spec->metadata_reg_c_3 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_3); - spec->metadata_reg_c_2 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_2); - spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1); - spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0); - spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a); + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl, clr); + spec->metadata_reg_c_7 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_7, clr); + spec->metadata_reg_c_6 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_6, clr); + spec->metadata_reg_c_5 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_5, clr); + spec->metadata_reg_c_4 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_4, clr); + spec->metadata_reg_c_3 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_3, clr); + spec->metadata_reg_c_2 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_2, clr); + spec->metadata_reg_c_1 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_1, clr); + spec->metadata_reg_c_0 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_0, clr); + spec->metadata_reg_a = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_a, clr); } -static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec) +static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec, bool clr) { - spec->inner_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_seq_num); - spec->outer_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_seq_num); - spec->inner_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_ack_num); - spec->outer_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_ack_num); + spec->inner_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_seq_num, clr); + spec->outer_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_seq_num, clr); + spec->inner_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_ack_num, clr); + spec->outer_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_ack_num, clr); spec->outer_vxlan_gpe_vni = - MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_vni); + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_vni, clr); spec->outer_vxlan_gpe_next_protocol = - MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol); + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol, clr); spec->outer_vxlan_gpe_flags = - MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_flags); - spec->icmpv4_header_data = MLX5_GET(fte_match_set_misc3, mask, icmp_header_data); + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_flags, clr); + spec->icmpv4_header_data = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_header_data, clr); spec->icmpv6_header_data = - MLX5_GET(fte_match_set_misc3, mask, icmpv6_header_data); - spec->icmpv4_type = MLX5_GET(fte_match_set_misc3, mask, icmp_type); - spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code); - spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type); - spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code); + IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_header_data, clr); + spec->icmpv4_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_type, clr); + spec->icmpv4_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_code, clr); + spec->icmpv6_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_type, clr); + spec->icmpv6_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_code, clr); spec->geneve_tlv_option_0_data = - MLX5_GET(fte_match_set_misc3, mask, geneve_tlv_option_0_data); - spec->gtpu_msg_flags = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_flags); - spec->gtpu_msg_type = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_type); - spec->gtpu_teid = MLX5_GET(fte_match_set_misc3, mask, gtpu_teid); - spec->gtpu_dw_0 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_0); - spec->gtpu_dw_2 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_2); + IFC_GET_CLR(fte_match_set_misc3, mask, geneve_tlv_option_0_data, clr); + spec->gtpu_teid = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_teid, clr); + spec->gtpu_msg_flags = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_flags, clr); + spec->gtpu_msg_type = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_type, clr); + spec->gtpu_dw_0 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_0, clr); + spec->gtpu_dw_2 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_2, clr); spec->gtpu_first_ext_dw_0 = - MLX5_GET(fte_match_set_misc3, mask, gtpu_first_ext_dw_0); + IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_first_ext_dw_0, clr); } -static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec) +static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec, bool clr) { spec->prog_sample_field_id_0 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_0); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_0, clr); spec->prog_sample_field_value_0 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_0); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_0, clr); spec->prog_sample_field_id_1 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_1); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_1, clr); spec->prog_sample_field_value_1 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_1); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_1, clr); spec->prog_sample_field_id_2 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_2); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_2, clr); spec->prog_sample_field_value_2 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_2); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_2, clr); spec->prog_sample_field_id_3 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_3); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_3, clr); spec->prog_sample_field_value_3 = - MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_3); + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr); } void mlx5dr_ste_copy_param(u8 match_criteria, struct mlx5dr_match_param *set_param, - struct mlx5dr_match_parameters *mask) + struct mlx5dr_match_parameters *mask, + bool clr) { u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {}; u8 *data = (u8 *)mask->match_buf; @@ -881,7 +897,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } else { buff = mask->match_buf; } - dr_ste_copy_mask_spec(buff, &set_param->outer); + dr_ste_copy_mask_spec(buff, &set_param->outer, clr); } param_location = sizeof(struct mlx5dr_match_spec); @@ -894,7 +910,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } else { buff = data + param_location; } - dr_ste_copy_mask_misc(buff, &set_param->misc); + dr_ste_copy_mask_misc(buff, &set_param->misc, clr); } param_location += sizeof(struct mlx5dr_match_misc); @@ -907,7 +923,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } else { buff = data + param_location; } - dr_ste_copy_mask_spec(buff, &set_param->inner); + dr_ste_copy_mask_spec(buff, &set_param->inner, clr); } param_location += sizeof(struct mlx5dr_match_spec); @@ -920,7 +936,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } else { buff = data + param_location; } - dr_ste_copy_mask_misc2(buff, &set_param->misc2); + dr_ste_copy_mask_misc2(buff, &set_param->misc2, clr); } param_location += sizeof(struct mlx5dr_match_misc2); @@ -934,7 +950,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } else { buff = data + param_location; } - dr_ste_copy_mask_misc3(buff, &set_param->misc3); + dr_ste_copy_mask_misc3(buff, &set_param->misc3, clr); } param_location += sizeof(struct mlx5dr_match_misc3); @@ -948,7 +964,7 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } else { buff = data + param_location; } - dr_ste_copy_mask_misc4(buff, &set_param->misc4); + dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 3f47d2b3b6e6..3028b776da00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -1230,7 +1230,8 @@ void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_htbl_connect_info *connect_info); void mlx5dr_ste_copy_param(u8 match_criteria, struct mlx5dr_match_param *set_param, - struct mlx5dr_match_parameters *mask); + struct mlx5dr_match_parameters *mask, + bool clear); struct mlx5dr_qp { struct mlx5_core_dev *mdev; From 28e7606fa8f106cdc0355e0548396c037443e063 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 26 Oct 2021 16:01:03 +0300 Subject: [PATCH 06/14] net/mlx5e: Refactor rx handler of represetor device Move the ownership of skb forwarding to network stack to the tc update_skb handler as different cases will require different handling of the skb. While the tc handler will take care of the various cases and properly handle the handover of the skb to the network stack and freeing the skb, the main rx handler will be kept clean from branches and usage of flags. Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 41 ++++++++++++------- .../ethernet/mellanox/mlx5/core/en/rep/tc.h | 13 ++---- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 22 +--------- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 1 - 4 files changed, 32 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 398c6761eeb3..eb960eba6027 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -19,6 +19,7 @@ #include "en/tc_tun.h" #include "lib/port_tun.h" #include "en/tc/sample.h" +#include "en_accel/ipsec_rxtx.h" struct mlx5e_rep_indr_block_priv { struct net_device *netdev; @@ -652,6 +653,12 @@ static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); } +static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->tun_dev) + dev_put(tc_priv->tun_dev); +} + static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj, struct mlx5e_tc_update_priv *tc_priv) @@ -665,10 +672,10 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk mlx5_rep_tc_post_napi_receive(tc_priv); } -bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv) +void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb) { + struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5_mapped_obj mapped_obj; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; @@ -677,7 +684,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) - return true; + goto forward; /* If reg_c0 is not equal to the default flow tag then skb->mark * is not supported and must be reset back to 0. @@ -691,26 +698,30 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, netdev_dbg(priv->netdev, "Couldn't find mapped object for reg_c0: %d, err: %d\n", reg_c0, err); - return false; + goto free_skb; } if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); - return mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, tc_priv); + if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) && + !mlx5_ipsec_is_rx_flow(cqe)) + goto free_skb; } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { - mlx5e_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); - return false; + mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv); + goto free_skb; } else { netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); - return false; + goto free_skb; } - return true; -} +forward: + napi_gro_receive(rq->cq.napi, skb); -void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) -{ - if (tc_priv->tun_dev) - dev_put(tc_priv->tun_dev); + mlx5_rep_tc_post_napi_receive(&tc_priv); + + return; + +free_skb: + dev_kfree_skb_any(skb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index d0661578467b..0a8334d20b3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -36,10 +36,8 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data); -bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv); -void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv); +void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb); #else /* CONFIG_MLX5_CLS_ACT */ @@ -67,12 +65,9 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { return -EOPNOTSUPP; } struct mlx5e_tc_update_priv; -static inline bool -mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv) { return true; } static inline void -mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) {} +mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb) {} #endif /* CONFIG_MLX5_CLS_ACT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f63c8ff3ef3f..96967b0a2441 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1660,7 +1660,6 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; struct sk_buff *skb; @@ -1696,15 +1695,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); - if (unlikely(!mlx5_ipsec_is_rx_flow(cqe) && - !mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))) { - dev_kfree_skb_any(skb); - goto free_wqe; - } - - napi_gro_receive(rq->cq.napi, skb); - - mlx5_rep_tc_post_napi_receive(&tc_priv); + mlx5e_rep_tc_receive(cqe, rq, skb); free_wqe: mlx5e_free_rx_wqe(rq, wi, true); @@ -1721,7 +1712,6 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; - struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5e_rx_wqe_ll *wqe; struct mlx5_wq_ll *wq; struct sk_buff *skb; @@ -1754,15 +1744,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - if (unlikely(!mlx5_ipsec_is_rx_flow(cqe) && - !mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))) { - dev_kfree_skb_any(skb); - goto mpwrq_cqe_out; - } - - napi_gro_receive(rq->cq.napi, skb); - - mlx5_rep_tc_post_napi_receive(&tc_priv); + mlx5e_rep_tc_receive(cqe, rq, skb); mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3af3da214a5b..f458f7f6b299 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -60,7 +60,6 @@ #include "en/mapping.h" #include "en/tc_ct.h" #include "en/mod_hdr.h" -#include "en/tc_priv.h" #include "en/tc_tun_encap.h" #include "en/tc/sample.h" #include "lib/devcom.h" From 189ce08ebf876df2b51f625877731055475352df Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 26 Oct 2021 20:55:05 +0300 Subject: [PATCH 07/14] net/mlx5e: Use generic name for the forwarding dev pointer Rename tun_dev to fwd_dev within mlx5e_tc_update_priv struct since future implementation may introduce other device types which the handler is forwarding to. Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index eb960eba6027..de683724e184 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -612,8 +612,8 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, return false; } - /* Set tun_dev so we do dev_put() after datapath */ - tc_priv->tun_dev = dev; + /* Set fwd_dev so we do dev_put() after datapath */ + tc_priv->fwd_dev = dev; skb->dev = dev; @@ -655,8 +655,8 @@ static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) { - if (tc_priv->tun_dev) - dev_put(tc_priv->tun_dev); + if (tc_priv->fwd_dev) + dev_put(tc_priv->fwd_dev); } static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 1a4cd882f0fb..df0f63c21e72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -56,7 +56,7 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); struct mlx5e_tc_update_priv { - struct net_device *tun_dev; + struct net_device *fwd_dev; }; struct mlx5_nic_flow_attr { From 4f4edcc2b84fecec66748ecbb90a84b981ecdaae Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 29 Apr 2021 09:53:41 +0300 Subject: [PATCH 08/14] net/mlx5: E-Switch, Add ovs internal port mapping to metadata support Adding infrastructure to map ovs internal port device to vport match metadata to support offload of rules with internal port as the filter device or as the destination device. The infrastructure allows adding and removing internal port device to an eswitch database and getting a unique vport metadata value to be placed and match on in reg_c0 when offloading rules that are coming from or going to an internal port. The new int port metadata can be written to the source port register in HW to indicate that current source port of the packet is the internal port and not one of the actual HW vports (uplink or VF). Using this method, it is possible to offload TC rules with an OVS internal port as their destination port (overwriting the src vport register) or as the filter port (matching on the value of the src vport register and making sure it matches to the internal port's value). There is also a need to handle a miss case where the packet's src port value was changed in HW to an internal port but a following rule which matches on this new src port value wasn't found in HW. In such case, the packet will be forwarded to the driver with metadata which allows driver to restore the info of the internal port's netdevice. Once this info is restored, the uplink driver can forward the packet to the relevant netdevice in SW. Signed-off-by: Ariel Levkovich Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 43 +- .../ethernet/mellanox/mlx5/core/en/rep/tc.h | 1 - .../mellanox/mlx5/core/en/tc/int_port.c | 457 ++++++++++++++++++ .../mellanox/mlx5/core/en/tc/int_port.h | 65 +++ .../net/ethernet/mellanox/mlx5/core/en_rep.c | 13 +- .../net/ethernet/mellanox/mlx5/core/en_rep.h | 4 + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 4 + .../net/ethernet/mellanox/mlx5/core/en_tc.h | 3 + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 + .../mellanox/mlx5/core/eswitch_offloads.c | 18 +- 11 files changed, 607 insertions(+), 9 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index bdb271b604d9..e63bb9ceb9c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ esw/indir_table.o en/tc_tun_encap.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ - en/tc/post_act.o + en/tc/post_act.o en/tc/int_port.o mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index de683724e184..c69129940268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -20,6 +20,7 @@ #include "lib/port_tun.h" #include "en/tc/sample.h" #include "en_accel/ipsec_rxtx.h" +#include "en/tc/int_port.h" struct mlx5e_rep_indr_block_priv { struct net_device *netdev; @@ -672,12 +673,43 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk mlx5_rep_tc_post_napi_receive(tc_priv); } +static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv, + bool *forward_tx, + u32 reg_c1) +{ + u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + /* Tunnel restore takes precedence over int port restore */ + if (tunnel_id) + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, + mapped_obj->int_port_metadata, forward_tx)) { + /* Set fwd_dev for future dev_put */ + tc_priv->fwd_dev = skb->dev; + + return true; + } + + return false; +} + void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, struct sk_buff *skb) { + u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5_mapped_obj mapped_obj; struct mlx5_eswitch *esw; + bool forward_tx = false; struct mlx5e_priv *priv; u32 reg_c0; int err; @@ -702,21 +734,26 @@ void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, } if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { - u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); - if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) && !mlx5_ipsec_is_rx_flow(cqe)) goto free_skb; } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv); goto free_skb; + } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) { + if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv, + &forward_tx, reg_c1)) + goto free_skb; } else { netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); goto free_skb; } forward: - napi_gro_receive(rq->cq.napi, skb); + if (forward_tx) + dev_queue_xmit(skb); + else + napi_gro_receive(rq->cq.napi, skb); mlx5_rep_tc_post_napi_receive(&tc_priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index 0a8334d20b3b..d6c7c81690eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -64,7 +64,6 @@ static inline int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { return -EOPNOTSUPP; } -struct mlx5e_tc_update_priv; static inline void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, struct sk_buff *skb) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c new file mode 100644 index 000000000000..ca834bbcb44f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "en/mapping.h" +#include "en/tc/int_port.h" +#include "en.h" +#include "en_rep.h" +#include "en_tc.h" + +struct mlx5e_tc_int_port { + enum mlx5e_tc_int_port_type type; + int ifindex; + u32 match_metadata; + u32 mapping; + struct list_head list; + struct mlx5_flow_handle *rx_rule; + refcount_t refcnt; + struct rcu_head rcu_head; +}; + +struct mlx5e_tc_int_port_priv { + struct mlx5_core_dev *dev; + struct mutex int_ports_lock; /* Protects int ports list */ + struct list_head int_ports; /* Uses int_ports_lock */ + u16 num_ports; + bool ul_rep_rx_ready; /* Set when uplink is performing teardown */ + struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */ +}; + +bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw) +{ + return mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_GEN(esw->dev, reg_c_preserve); +} + +u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port) +{ + return int_port->match_metadata; +} + +int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port) +{ + /* For egress forwarding we can have the case + * where the packet came from a vport and redirected + * to int port or it came from the uplink, going + * via internal port and hairpinned back to uplink + * so we set the source to any port in this case. + */ + return int_port->type == MLX5E_TC_INT_PORT_EGRESS ? + MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT : + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + +u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port) +{ + return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +static struct mlx5_flow_handle * +mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw, + struct mlx5e_tc_int_port *int_port, + struct mlx5_flow_destination *dest) + +{ + struct mlx5_flow_context *flow_context; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5e_tc_int_port_get_metadata_for_match(int_port)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + /* Overwrite flow tag with the int port metadata mapping + * instead of the chain mapping. + */ + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = int_port->mapping; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + &flow_act, dest, 1); + if (IS_ERR(flow_rule)) + mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n", + PTR_ERR(flow_rule)); + + kvfree(spec); + + return flow_rule; +} + +static struct mlx5e_tc_int_port * +mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5e_tc_int_port *int_port; + + if (!priv->ul_rep_rx_ready) + goto not_found; + + list_for_each_entry(int_port, &priv->int_ports, list) + if (int_port->ifindex == ifindex && int_port->type == type) { + refcount_inc(&int_port->refcnt); + return int_port; + } + +not_found: + return NULL; +} + +static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv, + int ifindex, enum mlx5e_tc_int_port_type type, + u32 *id) +{ + u32 mapped_key[2] = {type, ifindex}; + int err; + + err = mapping_add(priv->metadata_mapping, mapped_key, id); + if (err) + return err; + + /* Fill upper 4 bits of PFNUM with reserved value */ + *id |= 0xf << ESW_VPORT_BITS; + + return 0; +} + +static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv, + u32 id) +{ + id &= (1 << ESW_VPORT_BITS) - 1; + mapping_remove(priv->metadata_mapping, id); +} + +/* Must be called with priv->int_ports_lock held */ +static struct mlx5e_tc_int_port * +mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5_eswitch *esw = priv->dev->priv.eswitch; + struct mlx5_mapped_obj mapped_obj = {}; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_int_port *int_port; + struct mlx5_flow_destination dest; + struct mapping_ctx *ctx; + u32 match_metadata; + u32 mapping; + int err; + + if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) { + mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d", + MLX5E_TC_MAX_INT_PORT_NUM); + return ERR_PTR(-ENOSPC); + } + + int_port = kzalloc(sizeof(*int_port), GFP_KERNEL); + if (!int_port) + return ERR_PTR(-ENOMEM); + + err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata); + if (err) { + mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d", + ifindex); + goto err_metadata; + } + + /* map metadata to reg_c0 object for miss handling */ + ctx = esw->offloads.reg_c0_obj_pool; + mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA; + mapped_obj.int_port_metadata = match_metadata; + err = mapping_add(ctx, &mapped_obj, &mapping); + if (err) + goto err_map; + + int_port->type = type; + int_port->ifindex = ifindex; + int_port->match_metadata = match_metadata; + int_port->mapping = mapping; + + /* Create a match on internal vport metadata in vport table */ + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = uplink_rpriv->root_ft; + + int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest); + if (IS_ERR(int_port->rx_rule)) { + err = PTR_ERR(int_port->rx_rule); + mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err); + goto err_rx_rule; + } + + refcount_set(&int_port->refcnt, 1); + list_add_rcu(&int_port->list, &priv->int_ports); + priv->num_ports++; + + return int_port; + +err_rx_rule: + mapping_remove(ctx, int_port->mapping); + +err_map: + mlx5e_int_port_metadata_free(priv, match_metadata); + +err_metadata: + kfree(int_port); + + return ERR_PTR(err); +} + +/* Must be called with priv->int_ports_lock held */ +static void +mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port) +{ + struct mlx5_eswitch *esw = priv->dev->priv.eswitch; + struct mapping_ctx *ctx; + + ctx = esw->offloads.reg_c0_obj_pool; + + list_del_rcu(&int_port->list); + + /* The following parameters are not used by the + * rcu readers of this int_port object so it is + * safe to release them. + */ + if (int_port->rx_rule) + mlx5_del_flow_rules(int_port->rx_rule); + mapping_remove(ctx, int_port->mapping); + mlx5e_int_port_metadata_free(priv, int_port->match_metadata); + kfree_rcu(int_port); + priv->num_ports--; +} + +/* Must be called with rcu_read_lock held */ +static struct mlx5e_tc_int_port * +mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv, + u32 metadata) +{ + struct mlx5e_tc_int_port *int_port; + + list_for_each_entry_rcu(int_port, &priv->int_ports, list) + if (int_port->match_metadata == metadata) + return int_port; + + return NULL; +} + +struct mlx5e_tc_int_port * +mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5e_tc_int_port *int_port; + + if (!priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&priv->int_ports_lock); + + /* Reject request if ul rep not ready */ + if (!priv->ul_rep_rx_ready) { + int_port = ERR_PTR(-EOPNOTSUPP); + goto done; + } + + int_port = mlx5e_int_port_lookup(priv, ifindex, type); + if (int_port) + goto done; + + /* Alloc and add new int port to list */ + int_port = mlx5e_int_port_add(priv, ifindex, type); + +done: + mutex_unlock(&priv->int_ports_lock); + + return int_port; +} + +void +mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port) +{ + if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock)) + return; + + mlx5e_int_port_remove(priv, int_port); + mutex_unlock(&priv->int_ports_lock); +} + +struct mlx5e_tc_int_port_priv * +mlx5e_tc_int_port_init(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_int_port_priv *int_port_priv; + u64 mapping_id; + + if (!mlx5e_tc_int_port_supported(esw)) + return NULL; + + int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL); + if (!int_port_priv) + return NULL; + + mapping_id = mlx5_query_nic_system_image_guid(priv->mdev); + + int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT, + sizeof(u32) * 2, + (1 << ESW_VPORT_BITS) - 1, true); + if (IS_ERR(int_port_priv->metadata_mapping)) { + mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n", + PTR_ERR(int_port_priv->metadata_mapping)); + goto err_mapping; + } + + int_port_priv->dev = priv->mdev; + mutex_init(&int_port_priv->int_ports_lock); + INIT_LIST_HEAD(&int_port_priv->int_ports); + + return int_port_priv; + +err_mapping: + kfree(int_port_priv); + + return NULL; +} + +void +mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv) +{ + if (!priv) + return; + + mutex_destroy(&priv->int_ports_lock); + mapping_destroy(priv->metadata_mapping); + kfree(priv); +} + +/* Int port rx rules reside in ul rep rx tables. + * It is possible the ul rep will go down while there are + * still int port rules in its rx table so proper cleanup + * is required to free resources. + */ +void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_tc_int_port_priv *ppriv; + struct mlx5e_rep_priv *uplink_rpriv; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + ppriv = uplink_priv->int_port_priv; + + if (!ppriv) + return; + + mutex_lock(&ppriv->int_ports_lock); + ppriv->ul_rep_rx_ready = true; + mutex_unlock(&ppriv->int_ports_lock); +} + +void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_tc_int_port_priv *ppriv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_int_port *int_port; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + ppriv = uplink_priv->int_port_priv; + + if (!ppriv) + return; + + mutex_lock(&ppriv->int_ports_lock); + + ppriv->ul_rep_rx_ready = false; + + list_for_each_entry(int_port, &ppriv->int_ports, list) { + if (!IS_ERR_OR_NULL(int_port->rx_rule)) + mlx5_del_flow_rules(int_port->rx_rule); + + int_port->rx_rule = NULL; + } + + mutex_unlock(&ppriv->int_ports_lock); +} + +bool +mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv, + struct sk_buff *skb, u32 int_vport_metadata, + bool *forward_tx) +{ + enum mlx5e_tc_int_port_type fwd_type; + struct mlx5e_tc_int_port *int_port; + struct net_device *dev; + int ifindex; + + if (!priv) + return false; + + rcu_read_lock(); + int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata); + if (!int_port) { + rcu_read_unlock(); + mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n", + int_vport_metadata); + return false; + } + + ifindex = int_port->ifindex; + fwd_type = int_port->type; + rcu_read_unlock(); + + dev = dev_get_by_index(&init_net, ifindex); + if (!dev) { + mlx5_core_dbg(priv->dev, + "Couldn't find internal port device with ifindex: %d\n", + ifindex); + return false; + } + + skb->skb_iif = dev->ifindex; + skb->dev = dev; + + if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) { + skb->pkt_type = PACKET_HOST; + skb_set_redirected(skb, true); + *forward_tx = false; + } else { + skb_reset_network_header(skb); + skb_push_rcsum(skb, skb->mac_len); + skb_set_redirected(skb, false); + *forward_tx = true; + } + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h new file mode 100644 index 000000000000..e72c79d308d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_INT_PORT_H__ +#define __MLX5_EN_TC_INT_PORT_H__ + +#include "en.h" + +struct mlx5e_tc_int_port; +struct mlx5e_tc_int_port_priv; + +enum mlx5e_tc_int_port_type { + MLX5E_TC_INT_PORT_INGRESS, + MLX5E_TC_INT_PORT_EGRESS, +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw); + +struct mlx5e_tc_int_port_priv * +mlx5e_tc_int_port_init(struct mlx5e_priv *priv); +void +mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv); + +void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv); +void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv); + +bool +mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv, + struct sk_buff *skb, u32 int_vport_metadata, + bool *forward_tx); +struct mlx5e_tc_int_port * +mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type); +void +mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port); + +u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port); +u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port); +int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline u32 +mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port) +{ + return 0; +} + +static inline int +mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port) +{ + return 0; +} + +static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw) +{ + return false; +} + +static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {} +static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ +#endif /* __MLX5_EN_TC_INT_PORT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5230e0422cae..e58a9ec42553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -53,6 +53,7 @@ #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" #include "en_accel/ipsec.h" +#include "en/tc/int_port.h" #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) @@ -857,12 +858,22 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) { + int err; + mlx5e_create_q_counters(priv); - return mlx5e_init_rep_rx(priv); + err = mlx5e_init_rep_rx(priv); + if (err) + goto out; + + mlx5e_tc_int_port_init_rep_rx(priv); + +out: + return err; } static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv) { + mlx5e_tc_int_port_cleanup_rep_rx(priv); mlx5e_cleanup_rep_rx(priv); mlx5e_destroy_q_counters(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 48a203a9e7d9..b01dacb6f527 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -58,6 +58,7 @@ struct mlx5e_neigh_update_table { }; struct mlx5_tc_ct_priv; +struct mlx5_tc_int_port_priv; struct mlx5e_rep_bond; struct mlx5e_tc_tun_encap; struct mlx5e_post_act; @@ -98,6 +99,9 @@ struct mlx5_rep_uplink_priv { /* tc tunneling encapsulation private data */ struct mlx5e_tc_tun_encap *encap; + + /* OVS internal port support */ + struct mlx5e_tc_int_port_priv *int_port_priv; }; struct mlx5e_rep_priv { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f458f7f6b299..2b2caff6c4e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5073,6 +5073,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) MLX5_FLOW_NAMESPACE_FDB, uplink_priv->post_act); + uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev)); + uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); mapping_id = mlx5_query_nic_system_image_guid(esw->dev); @@ -5120,6 +5122,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); + mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); mlx5_tc_ct_clean(uplink_priv->ct_priv); netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); @@ -5140,6 +5143,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) mapping_destroy(uplink_priv->tunnel_mapping); mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); + mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); mlx5_tc_ct_clean(uplink_priv->ct_priv); mlx5e_tc_post_act_destroy(uplink_priv->post_act); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index df0f63c21e72..74999dcff70b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -38,6 +38,7 @@ #include "eswitch.h" #include "en/tc_ct.h" #include "en/tc_tun.h" +#include "en/tc/int_port.h" #include "en_rep.h" #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff @@ -104,6 +105,8 @@ struct mlx5_rx_tun_attr { #define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16 #define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0) +#define MLX5E_TC_MAX_INT_PORT_NUM (8) + #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) struct tunnel_match_key { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 28467f11f04b..194ba8313d4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -51,6 +51,7 @@ enum mlx5_mapped_obj_type { MLX5_MAPPED_OBJ_CHAIN, MLX5_MAPPED_OBJ_SAMPLE, + MLX5_MAPPED_OBJ_INT_PORT_METADATA, }; struct mlx5_mapped_obj { @@ -63,6 +64,7 @@ struct mlx5_mapped_obj { u32 trunc_size; u32 tunnel_id; } sample; + u32 int_port_metadata; }; }; @@ -88,6 +90,7 @@ enum { MAPPING_TYPE_TUNNEL_ENC_OPTS, MAPPING_TYPE_LABELS, MAPPING_TYPE_ZONE, + MAPPING_TYPE_INT_PORT, }; struct vport_ingress { @@ -336,6 +339,9 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); + /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0ef126fd6a8e..94da4aca28c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1857,6 +1857,17 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) atomic64_set(&esw->user_count, 0); } +static int esw_get_offloads_ft_size(struct mlx5_eswitch *esw) +{ + int nvports; + + nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS; + if (mlx5e_tc_int_port_supported(esw)) + nvports += MLX5E_TC_MAX_INT_PORT_NUM; + + return nvports; +} + static int esw_create_offloads_table(struct mlx5_eswitch *esw) { struct mlx5_flow_table_attr ft_attr = {}; @@ -1871,7 +1882,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = esw->total_vports + MLX5_ESW_MISS_FLOWS; + ft_attr.max_fte = esw_get_offloads_ft_size(esw); ft_attr.prio = 1; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); @@ -1900,7 +1911,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) int nvports; int err = 0; - nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS; + nvports = esw_get_offloads_ft_size(esw); flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -2805,7 +2816,8 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) { u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1; - u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 1; + /* Reserve 0xf for internal port offload */ + u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2; u32 pf_num; int id; From dbac71f22954276633e525f958994f84a7bd303f Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 11 Jan 2021 21:42:59 +0200 Subject: [PATCH 09/14] net/mlx5e: Accept action skbedit in the tc actions list Setting the skb packet type field to host is usually done when performing forwarding to ingress device. This is required since the receive handling that is used by the redirect to ingress action checks whether the packet doesn't belong to this host and drops the packet in such case. In order to be able to offload action redirect ingress, tc offload code needs to accept the skbedit ptype action as well. There's no special handling in HW for such action since it will be followed by a redirect action and therefore, this code only allows us to accept such action in the actions list but not performing anything specific in HW for it. Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2b2caff6c4e7..3242eba67047 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3856,6 +3856,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; break; + case FLOW_ACTION_PTYPE: + if (act->ptype != PACKET_HOST) { + NL_SET_ERR_MSG_MOD(extack, + "skbedit ptype is only supported with type host"); + return -EOPNOTSUPP; + } + break; case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; From 27484f7170edabbda7b53650cd24d38295cffe60 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Fri, 8 Jan 2021 21:42:39 +0200 Subject: [PATCH 10/14] net/mlx5e: Offload tc rules that redirect to ovs internal port Allow offloading rules that redirect to ovs internal port ingress and egress. To support redirect to ingress device, offloading of REDIRECT_INGRESS action is added. When a tc rule redirects to ovs internal port, the hw rule will overwrite the input vport value in reg_c0 with a new vport metadata value that is mapped for this internal port using the internal port mapping api that is introduce in previous patches. After that the hw rule will redirect the packet to the root table to continue processing with the new vport metadata value. The new vport metadata value indicates that this packet is now arriving through an internal port and therefore should be processed using rules that apply on the same internal port as the filter device. Therefore, following rules that apply on this internal port will have to match on the same vport metadata value as part of their matching keys to make sure the packet belongs to the internal port. Signed-off-by: Ariel Levkovich Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_priv.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 123 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_tc.h | 6 + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../mellanox/mlx5/core/eswitch_offloads.c | 10 +- .../mlx5/core/eswitch_offloads_termtbl.c | 3 +- 6 files changed, 141 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index d1599b7b944b..8f64f2c8895a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -173,4 +173,6 @@ void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow); +struct mlx5e_tc_int_port_priv * +mlx5e_get_int_port_priv(struct mlx5e_priv *priv); #endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3242eba67047..21c37a1a4796 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -231,6 +231,23 @@ mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, return err; } +struct mlx5e_tc_int_port_priv * +mlx5e_get_int_port_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->int_port_priv; + } + + return NULL; +} + static struct mlx5_tc_ct_priv * get_ct_priv(struct mlx5e_priv *priv) { @@ -1573,6 +1590,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); + if (esw_attr->dest_int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); + if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); @@ -3814,6 +3834,45 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, return 0; } +int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + int ifindex, + enum mlx5e_tc_int_port_type type, + u32 *action, + int out_index) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5e_tc_int_port_priv *int_port_priv; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_int_port *dest_int_port; + int err; + + parse_attr = attr->parse_attr; + int_port_priv = mlx5e_get_int_port_priv(priv); + + dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type); + if (IS_ERR(dest_int_port)) + return PTR_ERR(dest_int_port); + + err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, + mlx5e_tc_int_port_get_metadata(dest_int_port)); + if (err) { + mlx5e_tc_int_port_put(int_port_priv, dest_int_port); + return err; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + esw_attr->dest_int_port = dest_int_port; + esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + + /* Forward to root fdb for matching against the new source vport */ + attr->dest_chain = 0; + + return 0; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -3833,6 +3892,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; + bool ptype_host = false; bool mpls_push = false; if (!flow_action_has_entries(flow_action)) { @@ -3862,6 +3922,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, "skbedit ptype is only supported with type host"); return -EOPNOTSUPP; } + + ptype_host = true; break; case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | @@ -3926,6 +3988,50 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; return -EOPNOTSUPP; + case FLOW_ACTION_REDIRECT_INGRESS: { + struct net_device *out_dev; + + out_dev = act->dev; + if (!out_dev) + return -EOPNOTSUPP; + + if (!netif_is_ovs_master(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is supported only for OVS internal ports"); + return -EOPNOTSUPP; + } + + if (netif_is_ovs_master(parse_attr->filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is not supported from internal port"); + return -EOPNOTSUPP; + } + + if (!ptype_host) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress requires ptype=host action"); + return -EOPNOTSUPP; + } + + if (esw_attr->out_count) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress is supported only as single destination"); + return -EOPNOTSUPP; + } + + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS, + &action, esw_attr->out_count); + if (err) + return err; + + esw_attr->out_count++; + + break; + } case FLOW_ACTION_REDIRECT: case FLOW_ACTION_MIRRED: { struct mlx5e_priv *out_priv; @@ -4035,6 +4141,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, rpriv = out_priv->ppriv; esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; + esw_attr->out_count++; + } else if (netif_is_ovs_master(out_dev)) { + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, + out_dev->ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &action, + esw_attr->out_count); + if (err) + return err; + esw_attr->out_count++; } else if (parse_attr->filter_dev != priv->netdev) { /* All mlx5 devices are called to configure @@ -4136,6 +4252,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } } + /* If we forward to internal port we can only have 1 dest */ + if (esw_attr->dest_int_port && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, + "Redirect to internal port should be the only destination"); + return -EOPNOTSUPP; + } + /* always set IP version for indirect table handling */ attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 74999dcff70b..fdb222793027 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -286,6 +286,12 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport); +int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + int ifindex, + enum mlx5e_tc_int_port_type type, + u32 *action, + int out_index); #else /* CONFIG_MLX5_CLS_ACT */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 194ba8313d4d..e3729bc131c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -467,6 +467,7 @@ struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; struct mlx5_core_dev *counter_dev; + struct mlx5e_tc_int_port *dest_int_port; int split_count; int out_count; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 94da4aca28c9..8994a2886aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -290,8 +290,11 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); if (err) goto err_setup_chain; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat; + + if (esw_attr->dests[j].pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat; + } } return 0; @@ -315,7 +318,8 @@ esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) int i; for (i = esw_attr->split_count; i < esw_attr->out_count; i++) - if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + if (esw_attr->dests[i].rep && + mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, esw_attr->dests[i].mdev)) return true; return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 879d78e46e47..d0407b369f6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -229,7 +229,8 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, /* hairpin */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) - if (esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + if (esw_attr->dests[i].rep && + esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) return true; return false; From 100ad4e2d75837c9b42f49b3814b4b42ec9ebe46 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Fri, 8 Jan 2021 22:03:48 +0200 Subject: [PATCH 11/14] net/mlx5e: Offload internal port as encap route device When pefroming encap action, a route lookup is performed to find the routing device the packet should be forwarded to after the encapsulation. This is the device that has the local tunnel ip address. This change adds support to offload an encap rule where the route device ends up being an ovs internal port. In such case, the driver will add a HW rule that will encapsulate the packet with the tunnel header and will overwrite the vport metadata in reg_c0 to the internal port metadata value. Finally, the packet will be forwarded to the root table to be processed again with the indication that it came from an internal port. Signed-off-by: Ariel Levkovich Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 3 +- .../mellanox/mlx5/core/en/tc_tun_encap.c | 35 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index d7e613d0139a..c57180d030c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -83,7 +83,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, */ *route_dev = dev; if (!netdev_port_same_parent_id(priv->netdev, real_dev) || - dst_is_lag_dev || is_vlan_dev(*route_dev)) + dst_is_lag_dev || is_vlan_dev(*route_dev) || + netif_is_ovs_master(*route_dev)) *out_dev = uplink_dev; else if (mlx5e_eswitch_rep(dev) && mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 1c44c6c345f5..660cca73c36c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -13,6 +13,30 @@ enum { MLX5E_ROUTE_ENTRY_VALID = BIT(0), }; +static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct mlx5e_encap_entry *e, + int out_index) +{ + struct net_device *route_dev; + int err = 0; + + route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); + + if (!route_dev || !netif_is_ovs_master(route_dev)) + goto out; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &attr->action, out_index); + +out: + if (route_dev) + dev_put(route_dev); + + return err; +} + struct mlx5e_route_key { int ip_version; union { @@ -809,6 +833,17 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, if (err) goto out_err; + err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); + if (err == -EOPNOTSUPP) { + /* If device doesn't support int port offload, + * redirect to uplink vport. + */ + mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n"); + err = 0; + } else if (err) { + goto out_err; + } + flow->encaps[out_index].e = e; list_add(&flow->encaps[out_index].list, &e->flows); flow->encaps[out_index].index = out_index; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 21c37a1a4796..3a82ca79de64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1458,7 +1458,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, goto err_out; if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) vf_tun = true; out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@ -1566,7 +1567,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) vf_tun = true; if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { mlx5e_detach_encap(priv, flow, out_index); From 166f431ec6beaf472bc2e116a202a127b64779e4 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 29 Apr 2021 10:03:29 +0300 Subject: [PATCH 12/14] net/mlx5e: Add indirect tc offload of ovs internal port Register callbacks for tc blocks of ovs internal port devices. This allows an indirect offloading rules that apply on such devices as the filter device. In case a rule is added to a tc block of an internal port, the mlx5 driver will implicitly add a matching on the internal port's unique vport metadata value to the rule's matching list. Therefore, only packets that previously hit a rule that redirects to an internal port and got the vport metadata overwritten to the internal port's unique metadata, can match on such indirect rule. Offloading of both ingress and egress tc blocks of internal ports is supported as opposed to other devices where only ingress block offloading is supported. Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 32 +++++++++++++---- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 36 +++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../mellanox/mlx5/core/eswitch_offloads.c | 32 ++++++++++++----- 4 files changed, 82 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index c69129940268..fcb0892c08a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -25,6 +25,7 @@ struct mlx5e_rep_indr_block_priv { struct net_device *netdev; struct mlx5e_rep_priv *rpriv; + enum flow_block_binder_type binder_type; struct list_head list; }; @@ -299,14 +300,16 @@ int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) static struct mlx5e_rep_indr_block_priv * mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev) + struct net_device *netdev, + enum flow_block_binder_type binder_type) { struct mlx5e_rep_indr_block_priv *cb_priv; list_for_each_entry(cb_priv, &rpriv->uplink_priv.tc_indr_block_priv_list, list) - if (cb_priv->netdev == netdev) + if (cb_priv->netdev == netdev && + cb_priv->binder_type == binder_type) return cb_priv; return NULL; @@ -344,9 +347,13 @@ mlx5e_rep_indr_offload(struct net_device *netdev, static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, void *type_data, void *indr_priv) { - unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); + unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_rep_indr_block_priv *priv = indr_priv; + flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ? + MLX5_TC_FLAG(EGRESS) : + MLX5_TC_FLAG(INGRESS); + switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, @@ -428,11 +435,14 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, void (*cleanup)(struct flow_block_cb *block_cb)) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool is_ovs_int_port = netif_is_ovs_master(netdev); struct mlx5e_rep_indr_block_priv *indr_priv; struct flow_block_cb *block_cb; if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && - !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)) { + !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) && + !is_ovs_int_port) { if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev)) return -EOPNOTSUPP; if (!mlx5e_rep_macvlan_mode_supported(netdev)) { @@ -441,7 +451,14 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, } } - if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + return -EOPNOTSUPP; + + if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port) + return -EOPNOTSUPP; + + if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw)) return -EOPNOTSUPP; f->unlocked_driver_cb = true; @@ -449,7 +466,7 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, switch (f->command) { case FLOW_BLOCK_BIND: - indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); if (indr_priv) return -EEXIST; @@ -459,6 +476,7 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, indr_priv->netdev = netdev; indr_priv->rpriv = rpriv; + indr_priv->binder_type = f->binder_type; list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); @@ -476,7 +494,7 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, return 0; case FLOW_BLOCK_UNBIND: - indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); if (!indr_priv) return -ENOENT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3a82ca79de64..e11a906d70c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1437,6 +1437,32 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; + if (netif_is_ovs_master(parse_attr->filter_dev)) { + struct mlx5e_tc_int_port *int_port; + + if (attr->chain) { + NL_SET_ERR_MSG_MOD(extack, + "Internal port rule is only supported on chain 0"); + return -EOPNOTSUPP; + } + + if (attr->dest_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Internal port rule offload doesn't support goto action"); + return -EOPNOTSUPP; + } + + int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), + parse_attr->filter_dev->ifindex, + flow_flag_test(flow, EGRESS) ? + MLX5E_TC_INT_PORT_EGRESS : + MLX5E_TC_INT_PORT_INGRESS); + if (IS_ERR(int_port)) + return PTR_ERR(int_port); + + esw_attr->int_port = int_port; + } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { struct net_device *out_dev; int mirred_ifindex; @@ -1592,6 +1618,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); + if (esw_attr->int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); + if (esw_attr->dest_int_port) mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); @@ -4254,10 +4283,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } } - /* If we forward to internal port we can only have 1 dest */ - if (esw_attr->dest_int_port && esw_attr->out_count > 1) { + /* Forward to/from internal port can only have 1 dest */ + if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) && + esw_attr->out_count > 1) { NL_SET_ERR_MSG_MOD(extack, - "Redirect to internal port should be the only destination"); + "Rules with internal port can have only one destination"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index e3729bc131c3..42f8ee2e5d9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -468,6 +468,7 @@ struct mlx5_esw_flow_attr { struct mlx5_core_dev *in_mdev; struct mlx5_core_dev *counter_dev; struct mlx5e_tc_int_port *dest_int_port; + struct mlx5e_tc_int_port *int_port; int split_count; int out_count; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8994a2886aa9..f4eaa5893886 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -86,12 +86,18 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { - if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && - attr && attr->in_rep) - spec->flow_context.flow_source = - attr->in_rep->vport == MLX5_VPORT_UPLINK ? - MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK : - MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) || !attr || !attr->in_rep) + return; + + if (attr->int_port) { + spec->flow_context.flow_source = mlx5e_tc_int_port_get_flow_source(attr->int_port); + + return; + } + + spec->flow_context.flow_source = (attr->in_rep->vport == MLX5_VPORT_UPLINK) ? + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK : + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; } /* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits @@ -121,6 +127,8 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_eswitch *src_esw, u16 vport) { + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + u32 metadata; void *misc2; void *misc; @@ -130,10 +138,16 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); + + if (esw_attr->int_port) + metadata = + mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); + else + metadata = + mlx5_eswitch_get_vport_metadata_for_match(src_esw, vport); + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); - MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(src_esw, - vport)); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, metadata); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, From 5e9942721749fc96b9df4b0545474153316c0571 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 7 Sep 2021 23:30:59 +0300 Subject: [PATCH 13/14] net/mlx5e: Term table handling of internal port rules Adjust termination table logic to handle rules which involve internal port as filter or forwarding device. For cases where the rule forwards from internal port to uplink, always choose to go via termination table. This is because it is not known from where the packet originally arrived to the internal port and it is possible that it came from the uplink itself, in which case a term table is required to perform hairpin. If the packet arrived from a vport, going via term table has no effect. For cases where the rule forwards to an internal port from uplink the rep pointer will point to the uplink rep, avoid going via termination table as it is not required. Signed-off-by: Ariel Levkovich Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index d0407b369f6f..182306bbefaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -220,7 +220,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || mlx5_esw_attr_flags_skip(attr->flags) || - !mlx5_eswitch_offload_is_uplink_port(esw, spec)) + (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) return false; /* push vlan on RX */ @@ -229,7 +229,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, /* hairpin */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) - if (esw_attr->dests[i].rep && + if (!esw_attr->dest_int_port && esw_attr->dests[i].rep && esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) return true; From b16eb3c81fe27978afdb2c111908d4d627a88d99 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sat, 9 Jan 2021 01:38:04 +0200 Subject: [PATCH 14/14] net/mlx5: Support internal port as decap route device When performing route device lookup for decap action, support the case of ovs internal port as the lookup result. In such case, an internal port struct is mapped and attached to the flow attributes so that the source port matching of the rule will match on the internal port's metadata value. Signed-off-by: Ariel Levkovich Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 29 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 24 +++++++++++++-- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index c57180d030c7..a5e450973225 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -711,6 +711,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct mlx5_flow_attr *flow_attr) { struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + struct mlx5e_tc_int_port *int_port; TC_TUN_ROUTE_ATTR_INIT(attr); u16 vport_num; int err = 0; @@ -735,17 +736,25 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, if (err) return err; - if (attr.route_dev->netdev_ops != &mlx5e_netdev_ops || - !mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) - goto out; + if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) { + err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num); + if (err) + goto out; - err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num); - if (err) - goto out; - - esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, - misc_parameters.vxlan_vni); - esw_attr->rx_tun_attr->decap_vport = vport_num; + esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.vxlan_vni); + esw_attr->rx_tun_attr->decap_vport = vport_num; + } else if (netif_is_ovs_master(attr.route_dev)) { + int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), + attr.route_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto out; + } + esw_attr->int_port = int_port; + } out: if (flow_attr->tun_ip_version == 4) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e11a906d70c7..835caa1c7b74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1401,6 +1401,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, int err = 0; int out_index; + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + /* We check chain range only for tc flows. * For ft flows, we checked attr->chain was originally 0 and set it to * FDB_FT_CHAIN which is outside tc range. @@ -1426,6 +1429,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5e_attach_decap_route(priv, flow); if (err) goto err_out; + + if (!attr->chain && esw_attr->int_port) { + /* If decap route device is internal port, change the + * source vport value in reg_c0 back to uplink just in + * case the rule performs goto chain > 0. If we have a miss + * on chain > 0 we want the metadata regs to hold the + * chain id so SW will resume handling of this packet + * from the proper chain. + */ + u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw, + esw_attr->in_rep->vport); + + err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, + metadata); + if (err) + return err; + } } if (flow_flag_test(flow, L3_TO_L2_DECAP)) { @@ -1434,9 +1455,6 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, goto err_out; } - parse_attr = attr->parse_attr; - esw_attr = attr->esw_attr; - if (netif_is_ovs_master(parse_attr->filter_dev)) { struct mlx5e_tc_int_port *int_port;