From b8a92577f4be89427f0b150261626ebcc25c498d Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 10 Jun 2019 23:38:14 +0000 Subject: [PATCH 01/32] net/mlx5: Increase wait time for fw initialization Firmware FLR happens sequentially, in some cases, like when destroying a VM that had many VFs, may require waiting much longer than 10 seconds. Increase the timeout to 2 minutes, and print a wait countdown status every 20 seconds. Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 61fa1d162d28..8e96c42d3b84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -169,18 +169,28 @@ static struct mlx5_profile profile[] = { #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 10000 +#define FW_PRE_INIT_TIMEOUT_MILI 120000 +#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 -static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) { + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; + BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); + while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; break; } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n", + jiffies_to_msecs(end - warn) / 1000); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } msleep(FW_INIT_WAIT_MS); } @@ -911,7 +921,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); if (err) { mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); @@ -924,7 +934,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) return err; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); if (err) { mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); From ac35dcd6e4bdfadc90d6566ccf9fdc3b8f5e1b23 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 10 Jun 2019 23:38:16 +0000 Subject: [PATCH 02/32] net/mlx5: E-Switch, Handle representors creation in handler context Unified representors creation in esw_functions_changed context handler. Emulate the esw_function_changed event for FW/HW that does not support this event. Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +-- .../mellanox/mlx5/core/eswitch_offloads.c | 89 ++++++++++--------- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d8935232964a..504c0440b0b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1720,7 +1720,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { struct mlx5_vport *vport; int total_nvports = 0; - u16 vf_nvports = 0; int err; int i, enabled_events; @@ -1739,15 +1738,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); if (mode == SRIOV_OFFLOADS) { - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_esw_query_functions(esw->dev, &vf_nvports); - if (err) - return err; + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) total_nvports = esw->total_vports; - } else { - vf_nvports = nvfs; + else total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); - } } esw->mode = mode; @@ -1761,7 +1755,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, vf_nvports, total_nvports); + err = esw_offloads_init(esw, nvfs, total_nvports); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d6246ee042fa..f843d8a35a2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1436,34 +1436,13 @@ static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) return err; } -static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) -{ - int err; - - /* Special vports must be loaded first. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; - - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_vfs; - - return 0; - -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_special_vport(struct mlx5_eswitch *esw) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, nvports, rep_type); + err = __load_reps_special_vport(esw, rep_type); if (err) goto err_reps; } @@ -1472,7 +1451,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_special_vport(esw, rep_type); return err; } @@ -1811,6 +1790,21 @@ static void esw_functions_changed_event_handler(struct work_struct *work) kfree(host_work); } +static void esw_emulate_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work = + container_of(work, struct mlx5_host_work, work); + struct mlx5_eswitch *esw = host_work->esw; + int err; + + if (esw->esw_funcs.num_vfs) { + err = esw_offloads_load_vf_reps(esw, esw->esw_funcs.num_vfs); + if (err) + esw_warn(esw->dev, "Load vf reps err=%d\n", err); + } + kfree(host_work); +} + static int esw_functions_changed_event(struct notifier_block *nb, unsigned long type, void *data) { @@ -1827,7 +1821,11 @@ static int esw_functions_changed_event(struct notifier_block *nb, host_work->esw = esw; - INIT_WORK(&host_work->work, esw_functions_changed_event_handler); + if (mlx5_eswitch_is_funcs_handler(esw->dev)) + INIT_WORK(&host_work->work, + esw_functions_changed_event_handler); + else + INIT_WORK(&host_work->work, esw_emulate_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; @@ -1836,13 +1834,14 @@ static int esw_functions_changed_event(struct notifier_block *nb, static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, u16 vf_nvports) { - if (!mlx5_eswitch_is_funcs_handler(esw->dev)) - return; - - MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, - ESW_FUNCTIONS_CHANGED); - mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); - esw->esw_funcs.num_vfs = vf_nvports; + if (mlx5_eswitch_is_funcs_handler(esw->dev)) { + esw->esw_funcs.num_vfs = 0; + MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } else { + esw->esw_funcs.num_vfs = vf_nvports; + } } static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) @@ -1863,7 +1862,11 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, if (err) return err; - err = esw_offloads_load_all_reps(esw, vf_nvports); + /* Only load special vports reps. VF reps will be loaded in + * context of functions_changed event handler through real + * or emulated event. + */ + err = esw_offloads_load_special_vport(esw); if (err) goto err_reps; @@ -1873,6 +1876,16 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, mlx5_rdma_enable_roce(esw->dev); + /* Call esw_functions_changed event to load VF reps: + * 1. HW does not support the event then emulate it + * Or + * 2. The event was already notified when num_vfs changed + * and eswitch was in legacy mode + */ + esw_functions_changed_event(&esw->esw_funcs.nb.nb, + MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED, + NULL); + return 0; err_reps: @@ -1901,18 +1914,10 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs; - esw_functions_changed_event_cleanup(esw); - - if (mlx5_eswitch_is_funcs_handler(esw->dev)) - num_vfs = esw->esw_funcs.num_vfs; - else - num_vfs = esw->dev->priv.sriov.num_vfs; - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs); esw_offloads_steering_cleanup(esw); } From 10ee82cedb62dd716c44ba7a2c458688638873ab Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Jun 2019 23:38:18 +0000 Subject: [PATCH 03/32] net/mlx5: E-Switch, Return raw output for query esw functions Current function only returns host num of VFs, later patch requires other params such as host maximum num of VFs. Return the raw output so that caller can extract info as needed. Signed-off-by: Bodong Wang Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 21 ++----------------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 7 ++++++- .../mellanox/mlx5/core/eswitch_offloads.c | 5 ++++- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 504c0440b0b0..a4df109fbeb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1686,10 +1686,9 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } -static int query_esw_functions(struct mlx5_core_dev *dev, - u32 *out, int outlen) +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; MLX5_SET(query_esw_functions_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); @@ -1697,22 +1696,6 @@ static int query_esw_functions(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0}; - int err; - - err = query_esw_functions(dev, out, sizeof(out)); - if (err) - return err; - - *num_vfs = MLX5_GET(query_esw_functions_out, out, - host_params_context.host_num_of_vfs); - esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs); - - return 0; -} - /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 135d9a29bbdf..e03811be771d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -387,7 +387,7 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); -int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs); +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) @@ -514,6 +514,11 @@ static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline int +mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen) +{ + return -EOPNOTSUPP; +} #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f843d8a35a2c..1638e4cdeb16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1762,6 +1762,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) static void esw_functions_changed_event_handler(struct work_struct *work) { + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {}; struct mlx5_host_work *host_work; struct mlx5_eswitch *esw; u16 num_vfs = 0; @@ -1770,7 +1771,9 @@ static void esw_functions_changed_event_handler(struct work_struct *work) host_work = container_of(work, struct mlx5_host_work, work); esw = host_work->esw; - err = mlx5_esw_query_functions(esw->dev, &num_vfs); + err = mlx5_esw_query_functions(esw->dev, out, sizeof(out)); + num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); if (err || num_vfs == esw->esw_funcs.num_vfs) goto out; From 86eec50beaf3a45f6432d491072fa5c54284dbca Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Jun 2019 23:38:19 +0000 Subject: [PATCH 04/32] net/mlx5: Support querying max VFs from device For ECPF with eswitch manager privilege, query the host max VF count by querying the device using query_functions command. With this enhancement: 1. flow steering entries are created only for valid vports based on the max VF count of the PF. 2. Driver only queries cap of valid vport. Eswitch requires the max VFs when doing initialization, so do sr-iov init before eswitch init. Signed-off-by: Bodong Wang Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/main.c | 22 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/sriov.c | 22 +++++++++++++++++++ include/linux/mlx5/driver.h | 7 ++---- include/linux/mlx5/mlx5_ifc.h | 2 +- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8e96c42d3b84..720f65bfe6a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -844,32 +844,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_rl_cleanup; } - err = mlx5_eswitch_init(dev); - if (err) { - mlx5_core_err(dev, "Failed to init eswitch %d\n", err); - goto err_mpfs_cleanup; - } - err = mlx5_sriov_init(dev); if (err) { mlx5_core_err(dev, "Failed to init sriov %d\n", err); - goto err_eswitch_cleanup; + goto err_mpfs_cleanup; + } + + err = mlx5_eswitch_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; } err = mlx5_fpga_init(dev); if (err) { mlx5_core_err(dev, "Failed to init fpga device %d\n", err); - goto err_sriov_cleanup; + goto err_eswitch_cleanup; } dev->tracer = mlx5_fw_tracer_create(dev); return 0; -err_sriov_cleanup: - mlx5_sriov_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: @@ -893,8 +893,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); - mlx5_sriov_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); mlx5_vxlan_destroy(dev->vxlan); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a249b3c3843d..2eecb831c499 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -208,6 +208,27 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) mlx5_device_disable_sriov(dev); } +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {}; + u16 host_total_vfs; + int err; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + err = mlx5_esw_query_functions(dev, out, sizeof(out)); + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (!err && host_total_vfs) + return host_total_vfs; + } + + return pci_sriov_get_totalvfs(dev->pdev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -218,6 +239,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) return 0; total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b5431f7d97cb..64155fe201ee 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -470,6 +470,7 @@ struct mlx5_core_sriov { struct mlx5_vf_context *vfs_ctx; int num_vfs; int enabled_vfs; + u16 max_vfs; }; struct mlx5_fc_stats { @@ -1103,13 +1104,9 @@ static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev) return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists); } -#define MLX5_HOST_PF_MAX_VFS (127u) static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) { - if (mlx5_core_is_ecpf_esw_manager(dev)) - return MLX5_HOST_PF_MAX_VFS; - else - return pci_sriov_get_totalvfs(dev->pdev); + return dev->priv.sriov.max_vfs; } static inline int mlx5_get_gid_table_len(u16 param) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6513b985c5e9..e3c154b573a2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9711,7 +9711,7 @@ struct mlx5_ifc_host_params_context_bits { u8 reserved_at_8[0x8]; u8 host_num_of_vfs[0x10]; - u8 reserved_at_20[0x10]; + u8 host_total_vfs[0x10]; u8 host_pci_bus[0x10]; u8 reserved_at_40[0x10]; From 081cc2d7fa5828c8214c41b97352245ca5cc0f58 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:21 +0000 Subject: [PATCH 05/32] net/mlx5: Introduce EQ polling budget Multiple EQs may share the same irq in subsequent patches. To avoid starvation, a budget is set per EQ's interrupt handler. Because of this change, it is no longer required to check that MLX5_NUM_SPARE_EQE eqes were polled (to detect that arm is required). It is guaranteed that MLX5_NUM_SPARE_EQE > budget, therefore the handler will arm and exit the handler before all the entries in the eq are polled. In the scenario where the handler is out of budget and there are more EQEs to poll, arming the EQ guarantees that the HW will send another interrupt and the handler will be called again. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 55 ++++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5e9319d3d90c..28defeaca80a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -61,6 +61,16 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, +}; + +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + struct mlx5_irq_info { cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; @@ -129,11 +139,16 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) struct mlx5_eq_comp *eq_comp = eq_ptr; struct mlx5_eq *eq = eq_ptr; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; u32 cqn = -1; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -151,20 +166,10 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) } ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); if (cqn != -1) @@ -197,12 +202,16 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -217,20 +226,10 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); return IRQ_HANDLED; From ca390799c2aa03632c294107fa7f647bcbdff428 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:23 +0000 Subject: [PATCH 06/32] net/mlx5: Change interrupt handler to call chain notifier Multiple EQs may share the same IRQ in subsequent patches. Instead of calling the IRQ handler directly, the EQ will register to an atomic chain notfier. The Linux built-in shared IRQ is not used because it forces the caller to disable the IRQ and clear affinity before free_irq() can be called. This patch is the first step in the separation of IRQ and EQ logic. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/odp.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 138 +++++++++++------- .../net/ethernet/mellanox/mlx5/core/lib/eq.h | 9 +- include/linux/mlx5/eq.h | 3 +- 5 files changed, 105 insertions(+), 56 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 40eb8be482e4..a043af7ee366 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -920,6 +920,7 @@ struct mlx5_ib_lb_state { }; struct mlx5_ib_pf_eq { + struct notifier_block irq_nb; struct mlx5_ib_dev *dev; struct mlx5_eq *core; struct work_struct work; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91507a2e9290..ac40a4fd5598 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1488,9 +1488,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) mlx5_eq_update_ci(eq->core, cc, 1); } -static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type, + void *data) { - struct mlx5_ib_pf_eq *eq = eq_ptr; + struct mlx5_ib_pf_eq *eq = + container_of(nb, struct mlx5_ib_pf_eq, irq_nb); unsigned long flags; if (spin_trylock_irqsave(&eq->lock, flags)) { @@ -1553,12 +1555,12 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) goto err_mempool; } + eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_PFAULT_IDX, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, - .context = eq, - .handler = mlx5_ib_eq_pf_int + .nb = &eq->irq_nb, }; eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); if (IS_ERR(eq->core)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 28defeaca80a..590c0fefaa25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -72,16 +72,16 @@ enum { static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); struct mlx5_irq_info { + struct atomic_notifier_head nh; cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; - void *context; /* dev_id provided to request_irq */ }; struct mlx5_eq_table { struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq cmd_eq; - struct mlx5_eq async_eq; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; @@ -109,6 +109,31 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) +static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + return &eq_table->irq_info[vecidx]; +} + +static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, + struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&irq->nh, nb); +} + +static int mlx5_irq_detach_nb(struct mlx5_irq_info *irq, + struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&irq->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -134,10 +159,13 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) { - struct mlx5_eq_comp *eq_comp = eq_ptr; - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; struct mlx5_eqe *eqe; int num_eqes = 0; u32 cqn = -1; @@ -175,7 +203,7 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) if (cqn != -1) tasklet_schedule(&eq_comp->tasklet_ctx.task); - return IRQ_HANDLED; + return 0; } /* Some architectures don't latch interrupts when they are disabled, so using @@ -189,16 +217,19 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) disable_irq(eq->core.irqn); count_eqe = eq->core.cons_index; - mlx5_eq_comp_int(eq->core.irqn, eq); + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); count_eqe = eq->core.cons_index - count_eqe; enable_irq(eq->core.irqn); return count_eqe; } -static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; @@ -232,7 +263,7 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) out: eq_update_ci(eq, 1); - return IRQ_HANDLED; + return 0; } static void init_eq_buf(struct mlx5_eq *eq) @@ -254,6 +285,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_info *irq_info; u8 vecidx = param->index; __be64 *pas; void *eqc; @@ -261,9 +293,6 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, u32 *in; int err; - if (eq_table->irq_info[vecidx].context) - return -EEXIST; - /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); @@ -306,24 +335,31 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", - name, pci_name(dev->pdev)); - eq_table->irq_info[vecidx].context = param->context; + irq_info = mlx5_irq_get(dev, vecidx); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, param->handler, 0, - eq_table->irq_info[vecidx].name, param->context); + eq->irq_nb = param->nb; + + err = request_irq(eq->irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); if (err) goto err_eq; - err = mlx5_debug_eq_add(dev, eq); + err = mlx5_irq_attach_nb(irq_info, param->nb); if (err) goto err_irq; + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_detach; + /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -331,8 +367,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, kvfree(in); return 0; +err_detach: + mlx5_irq_detach_nb(irq_info, param->nb); + err_irq: - free_irq(eq->irqn, eq); + free_irq(eq->irqn, &eq_table->irq_info[vecidx].nh); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -355,9 +394,11 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, irq_info->context); - irq_info->context = NULL; - + err = mlx5_irq_detach_nb(irq_info, eq->irq_nb); + if (err) + mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", + err); + free_irq(eq->irqn, &eq_table->irq_info[eq->vecidx].nh); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -479,7 +520,7 @@ static int cq_err_event_notifier(struct notifier_block *nb, /* type == MLX5_EVENT_TYPE_CQ_ERROR */ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); - eq = &eqt->async_eq; + eq = &eqt->async_eq.core; eqe = data; cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; @@ -548,14 +589,14 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); + table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_CMD_IDX, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, - .context = &table->cmd_eq, - .handler = mlx5_eq_async_int, + .nb = &table->cmd_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; @@ -563,27 +604,29 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); + table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_ASYNC_IDX, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, - .context = &table->async_eq, - .handler = mlx5_eq_async_int, + .nb = &table->async_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); + err = create_async_eq(dev, "mlx5_async_eq", + &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } + table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_PAGEREQ_IDX, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, - .context = &table->pages_eq, - .handler = mlx5_eq_async_int, + .nb = &table->pages_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); + err = create_async_eq(dev, "mlx5_pages_eq", + &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -592,11 +635,11 @@ static int create_async_eqs(struct mlx5_core_dev *dev) return err; err2: - destroy_async_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq.core); err1: mlx5_cmd_use_polling(dev); - destroy_async_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; @@ -607,19 +650,19 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; - err = destroy_async_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = destroy_async_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = destroy_async_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -629,17 +672,17 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - return &dev->priv.eq_table->async_eq; + return &dev->priv.eq_table->async_eq.core; } void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->async_eq.irqn); + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); } void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); } /* Generic EQ API for mlx5_core consumers @@ -837,12 +880,12 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, .mask = 0, .nent = nent, - .context = &eq->core, - .handler = mlx5_eq_comp_int + .nb = &eq->irq_nb, }; err = create_map_eq(dev, &eq->core, name, ¶m); if (err) { @@ -940,10 +983,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { - if (!table->irq_info[i].context) - continue; - free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); - table->irq_info[i].context = NULL; + free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); } mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index c0fb6d72b695..adbc228bd55d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -34,10 +34,17 @@ struct mlx5_eq { u8 eqn; int nent; struct mlx5_rsc_debug *dbg; + struct notifier_block *irq_nb; /* For destroy only */ +}; + +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; }; struct mlx5_eq_comp { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq core; + struct notifier_block irq_nb; struct mlx5_eq_tasklet tasklet_ctx; struct list_head list; }; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 00045cc4ea11..7909f1ff197c 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -26,8 +26,7 @@ struct mlx5_eq_param { u8 index; int nent; u64 mask; - void *context; - irq_handler_t handler; + struct notifier_block *nb; }; struct mlx5_eq * From 24163189da487b4caa751eef4e945c9333aae441 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:25 +0000 Subject: [PATCH 07/32] net/mlx5: Separate IRQ request/free from EQ life cycle Instead of requesting IRQ with eq creation, IRQs will be requested before EQ table creation. Instead of freeing the IRQs after EQ destroy, free IRQs after eq table destroy. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 121 +++++++++++++------ include/linux/mlx5/eq.h | 3 +- 3 files changed, 84 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ac40a4fd5598..7ce7c5bfe685 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1562,7 +1562,7 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) .nent = MLX5_IB_NUM_PF_EQE, .nb = &eq->irq_nb, }; - eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 590c0fefaa25..f187169cbe76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -134,6 +134,64 @@ static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) return IRQ_HANDLED; } +static void irq_set_name(char *name, int vecidx) +{ + switch (vecidx) { + case MLX5_EQ_CMD_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); + break; + case MLX5_EQ_ASYNC_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); + break; + case MLX5_EQ_PAGEREQ_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); + break; + case MLX5_EQ_PFAULT_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); + break; + default: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_EQ_VEC_COMP_BASE); + break; + } +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *eq_table; + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + eq_table = priv->eq_table; + for (i = 0; i < nvec; i++) { + struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq_info->nh); + } + return err; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -278,14 +336,12 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - struct mlx5_irq_info *irq_info; u8 vecidx = param->index; __be64 *pas; void *eqc; @@ -335,11 +391,6 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - irq_info = mlx5_irq_get(dev, vecidx); - ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); - snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); - eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); @@ -347,15 +398,10 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; eq->irq_nb = param->nb; - err = request_irq(eq->irqn, mlx5_irq_int_handler, 0, irq_info->name, - &irq_info->nh); + err = mlx5_irq_attach_nb(mlx5_irq_get(dev, vecidx), param->nb); if (err) goto err_eq; - err = mlx5_irq_attach_nb(irq_info, param->nb); - if (err) - goto err_irq; - err = mlx5_debug_eq_add(dev, eq); if (err) goto err_detach; @@ -368,10 +414,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, return 0; err_detach: - mlx5_irq_detach_nb(irq_info, param->nb); - -err_irq: - free_irq(eq->irqn, &eq_table->irq_info[vecidx].nh); + mlx5_irq_detach_nb(mlx5_irq_get(dev, vecidx), eq->irq_nb); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -386,19 +429,14 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; - struct mlx5_irq_info *irq_info; int err; - irq_info = &eq_table->irq_info[eq->vecidx]; - mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(irq_info, eq->irq_nb); + err = mlx5_irq_detach_nb(mlx5_irq_get(dev, eq->vecidx), eq->irq_nb); if (err) mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", err); - free_irq(eq->irqn, &eq_table->irq_info[eq->vecidx].nh); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -479,7 +517,7 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -static int create_async_eq(struct mlx5_core_dev *dev, const char *name, +static int create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; @@ -491,7 +529,7 @@ static int create_async_eq(struct mlx5_core_dev *dev, const char *name, goto unlock; } - err = create_map_eq(dev, eq, name, param); + err = create_map_eq(dev, eq, param); unlock: mutex_unlock(&eq_table->lock); return err; @@ -596,7 +634,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_CMD_EQE, .nb = &table->cmd_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq.core, ¶m); + err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; @@ -611,8 +649,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_ASYNC_EQE, .nb = &table->async_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_async_eq", - &table->async_eq.core, ¶m); + err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; @@ -625,8 +662,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = /* TODO: sriov max_vf + */ 1, .nb = &table->pages_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_pages_eq", - &table->pages_eq.core, ¶m); + err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -689,7 +725,7 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) * Needed For RDMA ODP EQ for now */ struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); @@ -698,7 +734,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, if (!eq) return ERR_PTR(-ENOMEM); - err = create_async_eq(dev, name, eq, param); + err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); eq = ERR_PTR(err); @@ -845,7 +881,6 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq_comp *eq; int ncomp_vec; int nent; @@ -879,7 +914,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) #ifdef CONFIG_RFS_ACCEL irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, @@ -887,7 +921,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) .nent = nent, .nb = &eq->irq_nb, }; - err = create_map_eq(dev, &eq->core, name, ¶m); + err = create_map_eq(dev, &eq->core, ¶m); if (err) { kfree(eq); goto clean; @@ -1018,8 +1052,14 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + err = request_irqs(dev, nvec); + if (err) + goto err_free_irqs; + return 0; +err_free_irqs: + pci_free_irq_vectors(dev->pdev); err_free_irq_info: kfree(table->irq_info); return err; @@ -1027,10 +1067,13 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) static void free_irq_vectors(struct mlx5_core_dev *dev) { - struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = dev->priv.eq_table; + int i; + for (i = 0; i < table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; i++) + free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table->irq_info); + kfree(table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) @@ -1039,7 +1082,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err = alloc_irq_vectors(dev); if (err) { - mlx5_core_err(dev, "alloc irq vectors failed\n"); + mlx5_core_err(dev, "Failed to create IRQ vectors\n"); return err; } diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 7909f1ff197c..73ab658af764 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -30,8 +30,7 @@ struct mlx5_eq_param { }; struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, - struct mlx5_eq_param *param); +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param); int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); From 561aa15ad69e9d1e5a8bb277adb3209bf8091ecb Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:27 +0000 Subject: [PATCH 08/32] net/mlx5: Separate IRQ data from EQ table data IRQ table should only exist for mlx5_core_dev for PF and VF only. EQ table of mediated devices should hold a pointer to the IRQ table of the parent PCI device. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 125 ++++++++++++------ .../net/ethernet/mellanox/mlx5/core/main.c | 11 +- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 3 + include/linux/mlx5/driver.h | 3 + 4 files changed, 98 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index f187169cbe76..cdfa35ec02fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -77,6 +77,14 @@ struct mlx5_irq_info { char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_irq_table { + struct mlx5_irq_info *irq_info; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + struct mlx5_eq_table { struct list_head comp_eqs_list; struct mlx5_eq_async pages_eq; @@ -89,11 +97,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif + int num_comp_eqs; + struct mlx5_irq_table *irq_table; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -109,11 +114,33 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +static int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_EQ_VEC_COMP_BASE; +} + static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_table *irq_table = dev->priv.irq_table; - return &eq_table->irq_info[vecidx]; + return &irq_table->irq_info[vecidx]; } static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, @@ -158,15 +185,12 @@ static void irq_set_name(char *name, int vecidx) static int request_irqs(struct mlx5_core_dev *dev, int nvec) { - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *eq_table; char name[MLX5_MAX_IRQ_NAME]; int err; int i; - eq_table = priv->eq_table; for (i = 0; i < nvec; i++) { - struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); irq_set_name(name, i); @@ -184,7 +208,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) err_request_irq: for (; i >= 0; i--) { - struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); free_irq(irqn, &irq_info->nh); @@ -501,6 +525,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + eq_table->irq_table = dev->priv.irq_table; return 0; kvfree_eq_table: @@ -796,10 +821,13 @@ EXPORT_SYMBOL(mlx5_eq_update_ci); static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_irq_info *irq_info; + int irq; + + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -819,20 +847,22 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + struct mlx5_irq_info *irq_info; + int irq; + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); irq_set_affinity_hint(irq, NULL); free_cpumask_var(irq_info->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) { + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int err; int i; - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { + for (i = 0; i < nvec; i++) { err = set_comp_irq_affinity_hint(mdev, i); if (err) goto err_out; @@ -849,9 +879,10 @@ static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) { + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int i; - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) + for (i = 0; i < nvec; i++) clear_comp_irq_affinity_hint(mdev, i); } @@ -863,9 +894,9 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; + if (table->irq_table->rmap) { + free_irq_cpu_rmap(table->irq_table->rmap); + table->irq_table->rmap = NULL; } #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { @@ -882,20 +913,20 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; - int ncomp_vec; + int ncomp_eqs; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; + ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; #ifdef CONFIG_RFS_ACCEL - table->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!table->rmap) + table->irq_table->rmap = alloc_irq_cpu_rmap(ncomp_eqs); + if (!table->irq_table->rmap) return -ENOMEM; #endif - for (i = 0; i < ncomp_vec; i++) { + for (i = 0; i < ncomp_eqs; i++) { int vecidx = i + MLX5_EQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; @@ -912,7 +943,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) (unsigned long)&eq->tasklet_ctx); #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); + irq_cpu_rmap_add(table->irq_table->rmap, + pci_irq_vector(dev->pdev, vecidx)); #endif eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { @@ -967,22 +999,23 @@ EXPORT_SYMBOL(mlx5_vector2eqn); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_vectors; + return dev->priv.eq_table->num_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - /* TODO: consider irq_get_affinity_mask(irq) */ - return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; + + return dev->priv.eq_table->irq_table->irq_info[vecidx].mask; } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->rmap; + return dev->priv.eq_table->irq_table->rmap; } #endif @@ -1008,16 +1041,17 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; + if (table->irq_table->rmap) { + free_irq_cpu_rmap(table->irq_table->rmap); + table->irq_table->rmap = NULL; } #endif mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + max_eqs = table->num_comp_eqs + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { - free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); } mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); @@ -1026,7 +1060,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) static int alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = priv->eq_table; + struct mlx5_irq_table *table = priv->irq_table; int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); @@ -1050,7 +1084,7 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) goto err_free_irq_info; } - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + table->nvec = nvec; err = request_irqs(dev, nvec); if (err) @@ -1067,17 +1101,19 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) static void free_irq_vectors(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq_table *table = dev->priv.irq_table; int i; - for (i = 0; i < table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; i++) - free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); pci_free_irq_vectors(dev->pdev); kfree(table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; err = alloc_irq_vectors(dev); @@ -1086,6 +1122,9 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) return err; } + eq_table->num_comp_eqs = + mlx5_irq_get_num_comp(eq_table->irq_table); + err = create_async_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create async EQs\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 720f65bfe6a9..be79dceea3c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -804,10 +804,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_devcom; } + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + err = mlx5_eq_table_init(dev); if (err) { mlx5_core_err(dev, "failed to initialize eq\n"); - goto err_devcom; + goto err_irq_cleanup; } err = mlx5_events_init(dev); @@ -883,6 +889,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); err_devcom: mlx5_devcom_unregister_device(dev->priv.devcom); @@ -905,6 +913,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); mlx5_devcom_unregister_device(dev->priv.devcom); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 22e69d4813e4..907515f3bfbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -153,6 +153,9 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); + int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); void mlx5_events_start(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 64155fe201ee..d8ab633406c2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -492,6 +492,7 @@ struct mlx5_eswitch; struct mlx5_lag; struct mlx5_devcom; struct mlx5_eq_table; +struct mlx5_irq_table; struct mlx5_rate_limit { u32 rate; @@ -521,6 +522,8 @@ struct mlx5_core_roce { }; struct mlx5_priv { + /* IRQ table valid only for real pci devices PF or VF */ + struct mlx5_irq_table *irq_table; struct mlx5_eq_table *eq_table; /* pages stuff */ From b79e6beb9c36a1f26116a9a576392647643ac456 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:28 +0000 Subject: [PATCH 09/32] net/mlx5: Move IRQ rmap creation to IRQ allocation phase Rmap creation/deletion is part of the IRQ life-cycle. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 82 ++++++++++++++------ 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index cdfa35ec02fa..1ea983c1ec05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -216,6 +216,49 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) return err; } +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_EQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -893,12 +936,6 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); -#ifdef CONFIG_RFS_ACCEL - if (table->irq_table->rmap) { - free_irq_cpu_rmap(table->irq_table->rmap); - table->irq_table->rmap = NULL; - } -#endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); if (destroy_unmap_eq(dev, &eq->core)) @@ -921,11 +958,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&table->comp_eqs_list); ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - table->irq_table->rmap = alloc_irq_cpu_rmap(ncomp_eqs); - if (!table->irq_table->rmap) - return -ENOMEM; -#endif for (i = 0; i < ncomp_eqs; i++) { int vecidx = i + MLX5_EQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; @@ -942,10 +974,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, (unsigned long)&eq->tasklet_ctx); -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->irq_table->rmap, - pci_irq_vector(dev->pdev, vecidx)); -#endif eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, @@ -1039,14 +1067,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) int i, max_eqs; clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->irq_table->rmap) { - free_irq_cpu_rmap(table->irq_table->rmap); - table->irq_table->rmap = NULL; - } -#endif - + irq_clear_rmap(dev); mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ max_eqs = table->num_comp_eqs + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { @@ -1086,13 +1107,19 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) table->nvec = nvec; + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + err = request_irqs(dev, nvec); if (err) - goto err_free_irqs; + goto err_request_irqs; return 0; -err_free_irqs: +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: pci_free_irq_vectors(dev->pdev); err_free_irq_info: kfree(table->irq_info); @@ -1104,6 +1131,11 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) struct mlx5_irq_table *table = dev->priv.irq_table; int i; + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); for (i = 0; i < table->nvec; i++) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); From 90426cc00c77e4c11f1d23799de44ecb54e8ff27 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:30 +0000 Subject: [PATCH 10/32] net/mlx5: Move IRQ affinity set to IRQ allocation phase Affinity set/clear is part of the IRQ life-cycle. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 25 +++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1ea983c1ec05..d30bd01cf050 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -934,8 +934,6 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; - clear_comp_irqs_affinity_hints(dev); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); if (destroy_unmap_eq(dev, &eq->core)) @@ -991,12 +989,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) list_add_tail(&eq->list, &table->comp_eqs_list); } - err = set_comp_irq_affinity_hints(dev); - if (err) { - mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); - goto clean; - } - return 0; clean: @@ -1078,6 +1070,16 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) pci_free_irq_vectors(dev->pdev); } +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + static int alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -1115,8 +1117,14 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) if (err) goto err_request_irqs; + err = set_comp_irq_affinity_hints(dev); + if (err) + goto err_set_affinity; + return 0; +err_set_affinity: + unrequest_irqs(dev); err_request_irqs: irq_clear_rmap(dev); err_set_rmap: @@ -1136,6 +1144,7 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) * which should be called after alloc_irq but before request_irq. */ irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); for (i = 0; i < table->nvec; i++) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); From e1706e62801e9ad65b1fb6e6eccc69acfa43d16d Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:32 +0000 Subject: [PATCH 11/32] net/mlx5: Separate IRQ table creation from EQ table creation IRQ allocation should be part of the IRQ table life-cycle. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 22 +++---------------- .../net/ethernet/mellanox/mlx5/core/main.c | 9 ++++++++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index d30bd01cf050..daf9bc3155cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1056,18 +1056,10 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i, max_eqs; - clear_comp_irqs_affinity_hints(dev); - irq_clear_rmap(dev); mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_eqs + MLX5_EQ_VEC_COMP_BASE; - for (i = max_eqs - 1; i >= 0; i--) { - free_irq(pci_irq_vector(dev->pdev, i), - &mlx5_irq_get(dev, i)->nh); - } + mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); - pci_free_irq_vectors(dev->pdev); } static void unrequest_irqs(struct mlx5_core_dev *dev) @@ -1080,7 +1072,7 @@ static void unrequest_irqs(struct mlx5_core_dev *dev) &mlx5_irq_get(dev, i)->nh); } -static int alloc_irq_vectors(struct mlx5_core_dev *dev) +int mlx5_irq_table_create(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_irq_table *table = priv->irq_table; @@ -1134,7 +1126,7 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) return err; } -static void free_irq_vectors(struct mlx5_core_dev *dev) +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) { struct mlx5_irq_table *table = dev->priv.irq_table; int i; @@ -1157,12 +1149,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - err = alloc_irq_vectors(dev); - if (err) { - mlx5_core_err(dev, "Failed to create IRQ vectors\n"); - return err; - } - eq_table->num_comp_eqs = mlx5_irq_get_num_comp(eq_table->irq_table); @@ -1182,7 +1168,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err_comp_eqs: destroy_async_eqs(dev); err_async_eqs: - free_irq_vectors(dev); return err; } @@ -1190,7 +1175,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { destroy_comp_eqs(dev); destroy_async_eqs(dev); - free_irq_vectors(dev); } int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index be79dceea3c3..bfc8c6faedc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1047,6 +1047,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_events_start(dev); mlx5_pagealloc_start(dev); + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + err = mlx5_eq_table_create(dev); if (err) { mlx5_core_err(dev, "Failed to create EQs\n"); @@ -1118,6 +1124,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) err_fw_tracer: mlx5_eq_table_destroy(dev); err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); @@ -1134,6 +1142,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_fpga_device_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 907515f3bfbb..14f1f63db3e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -155,6 +155,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev); int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); From bfb49549ea7993f49c0374295d84a0c7772102a2 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:34 +0000 Subject: [PATCH 12/32] net/mlx5: Generalize IRQ interface to work with irq_table IRQ interface should operate within the irq_table context. It should be independent of any EQ data structure. The interface that will be exposed: init/clenup, create/destroy, attach/detach Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 38 +++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index daf9bc3155cc..80a436b5034a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -143,16 +143,22 @@ static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) return &irq_table->irq_info[vecidx]; } -static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, +static int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - return atomic_notifier_chain_register(&irq->nh, nb); + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_register(&irq_info->nh, nb); } -static int mlx5_irq_detach_nb(struct mlx5_irq_info *irq, +static int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&irq->nh, nb); + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_unregister(&irq_info->nh, nb); } static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) @@ -465,7 +471,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; eq->irq_nb = param->nb; - err = mlx5_irq_attach_nb(mlx5_irq_get(dev, vecidx), param->nb); + err = mlx5_irq_attach_nb(dev->priv.eq_table->irq_table, vecidx, + param->nb); if (err) goto err_eq; @@ -481,7 +488,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, return 0; err_detach: - mlx5_irq_detach_nb(mlx5_irq_get(dev, vecidx), eq->irq_nb); + mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, vecidx, eq->irq_nb); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -500,7 +507,8 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(mlx5_irq_get(dev, eq->vecidx), eq->irq_nb); + err = mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, + eq->vecidx, eq->irq_nb); if (err) mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", err); @@ -1023,19 +1031,31 @@ unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_comp_vectors_count); +static struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq_info[vecidx].mask; +} + struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; - return dev->priv.eq_table->irq_table->irq_info[vecidx].mask; + return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, + vecidx); } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL +static struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} + struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->irq_table->rmap; + return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); } #endif From 256cf690af0668dd4e7c192648d2faf2e7e58788 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:37 +0000 Subject: [PATCH 13/32] net/mlx5: Move all IRQ logic to pci_irq.c Finalize IRQ separation and expose irq interface. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 327 ----------------- .../net/ethernet/mellanox/mlx5/core/lib/eq.h | 1 - .../ethernet/mellanox/mlx5/core/mlx5_core.h | 8 + .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 345 ++++++++++++++++++ 5 files changed, 354 insertions(+), 329 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 243368dc23db..cf8d2b74a2fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 80a436b5034a..0c72c122daef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -71,20 +71,6 @@ enum { static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); -struct mlx5_irq_info { - struct atomic_notifier_head nh; - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; -}; - -struct mlx5_irq_table { - struct mlx5_irq_info *irq_info; - int nvec; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif -}; - struct mlx5_eq_table { struct list_head comp_eqs_list; struct mlx5_eq_async pages_eq; @@ -114,157 +100,6 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) -int mlx5_irq_table_init(struct mlx5_core_dev *dev) -{ - struct mlx5_irq_table *irq_table; - - irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); - if (!irq_table) - return -ENOMEM; - - dev->priv.irq_table = irq_table; - return 0; -} - -void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) -{ - kvfree(dev->priv.irq_table); -} - -static int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) -{ - return table->nvec - MLX5_EQ_VEC_COMP_BASE; -} - -static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) -{ - struct mlx5_irq_table *irq_table = dev->priv.irq_table; - - return &irq_table->irq_info[vecidx]; -} - -static int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, - struct notifier_block *nb) -{ - struct mlx5_irq_info *irq_info; - - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_register(&irq_info->nh, nb); -} - -static int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, - struct notifier_block *nb) -{ - struct mlx5_irq_info *irq_info; - - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_unregister(&irq_info->nh, nb); -} - -static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) -{ - atomic_notifier_call_chain(nh, 0, NULL); - return IRQ_HANDLED; -} - -static void irq_set_name(char *name, int vecidx) -{ - switch (vecidx) { - case MLX5_EQ_CMD_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); - break; - case MLX5_EQ_ASYNC_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); - break; - case MLX5_EQ_PAGEREQ_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); - break; - case MLX5_EQ_PFAULT_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); - break; - default: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", - vecidx - MLX5_EQ_VEC_COMP_BASE); - break; - } -} - -static int request_irqs(struct mlx5_core_dev *dev, int nvec) -{ - char name[MLX5_MAX_IRQ_NAME]; - int err; - int i; - - for (i = 0; i < nvec; i++) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); - int irqn = pci_irq_vector(dev->pdev, i); - - irq_set_name(name, i); - ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); - snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); - err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, - &irq_info->nh); - if (err) { - mlx5_core_err(dev, "Failed to request irq\n"); - goto err_request_irq; - } - } - return 0; - -err_request_irq: - for (; i >= 0; i--) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); - int irqn = pci_irq_vector(dev->pdev, i); - - free_irq(irqn, &irq_info->nh); - } - return err; -} - -static void irq_clear_rmap(struct mlx5_core_dev *dev) -{ -#ifdef CONFIG_RFS_ACCEL - struct mlx5_irq_table *irq_table = dev->priv.irq_table; - - free_irq_cpu_rmap(irq_table->rmap); -#endif -} - -static int irq_set_rmap(struct mlx5_core_dev *mdev) -{ - int err = 0; -#ifdef CONFIG_RFS_ACCEL - struct mlx5_irq_table *irq_table = mdev->priv.irq_table; - int num_affinity_vec; - int vecidx; - - num_affinity_vec = mlx5_irq_get_num_comp(irq_table); - irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); - if (!irq_table->rmap) { - err = -ENOMEM; - mlx5_core_err(mdev, "failed to allocate cpu_rmap. err %d", err); - goto err_out; - } - - vecidx = MLX5_EQ_VEC_COMP_BASE; - for (; vecidx < irq_table->nvec; vecidx++) { - err = irq_cpu_rmap_add(irq_table->rmap, - pci_irq_vector(mdev->pdev, vecidx)); - if (err) { - mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", err); - goto err_irq_cpu_rmap_add; - } - } - return 0; - -err_irq_cpu_rmap_add: - irq_clear_rmap(mdev); -err_out: -#endif - return err; -} - static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -868,75 +703,6 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -/* Completion EQs */ - -static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_irq_info *irq_info; - int irq; - - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - irq_info->mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_irq_info *irq_info; - int irq; - - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); -} - -static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) -{ - int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); - int err; - int i; - - for (i = 0; i < nvec; i++) { - err = set_comp_irq_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - clear_comp_irq_affinity_hint(mdev, i); - - return err; -} - -static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) -{ - int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); - int i; - - for (i = 0; i < nvec; i++) - clear_comp_irq_affinity_hint(mdev, i); -} - static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -1031,12 +797,6 @@ unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_comp_vectors_count); -static struct cpumask * -mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) -{ - return irq_table->irq_info[vecidx].mask; -} - struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { @@ -1048,11 +808,6 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL -static struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) -{ - return irq_table->rmap; -} - struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); @@ -1082,88 +837,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) mutex_unlock(&table->lock); } -static void unrequest_irqs(struct mlx5_core_dev *dev) -{ - struct mlx5_irq_table *table = dev->priv.irq_table; - int i; - - for (i = 0; i < table->nvec; i++) - free_irq(pci_irq_vector(dev->pdev, i), - &mlx5_irq_get(dev, i)->nh); -} - -int mlx5_irq_table_create(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_irq_table *table = priv->irq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, - nvec, PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->nvec = nvec; - - err = irq_set_rmap(dev); - if (err) - goto err_set_rmap; - - err = request_irqs(dev, nvec); - if (err) - goto err_request_irqs; - - err = set_comp_irq_affinity_hints(dev); - if (err) - goto err_set_affinity; - - return 0; - -err_set_affinity: - unrequest_irqs(dev); -err_request_irqs: - irq_clear_rmap(dev); -err_set_rmap: - pci_free_irq_vectors(dev->pdev); -err_free_irq_info: - kfree(table->irq_info); - return err; -} - -void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) -{ - struct mlx5_irq_table *table = dev->priv.irq_table; - int i; - - /* free_irq requires that affinity and rmap will be cleared - * before calling it. This is why there is asymmetry with set_rmap - * which should be called after alloc_irq but before request_irq. - */ - irq_clear_rmap(dev); - clear_comp_irqs_affinity_hints(dev); - for (i = 0; i < table->nvec; i++) - free_irq(pci_irq_vector(dev->pdev, i), - &mlx5_irq_get(dev, i)->nh); - pci_free_irq_vectors(dev->pdev); - kfree(table->irq_info); -} - int mlx5_eq_table_create(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index adbc228bd55d..3836c39b2900 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,7 +7,6 @@ #include #include -#define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) struct mlx5_eq_tasklet { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 14f1f63db3e3..e0f6783a5f6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -157,6 +157,14 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 000000000000..75408639d150 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include +#include +#include "mlx5_core.h" +#ifdef CONFIG_RFS_ACCEL +#include +#endif + +#define MLX5_MAX_IRQ_NAME (32) + +struct mlx5_irq_info { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_irq_table { + struct mlx5_irq_info *irq_info; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_EQ_VEC_COMP_BASE; +} + +static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + return &irq_table->irq_info[vecidx]; +} + +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_register(&irq_info->nh, nb); +} + +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_unregister(&irq_info->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_set_name(char *name, int vecidx) +{ + switch (vecidx) { + case MLX5_EQ_CMD_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); + break; + case MLX5_EQ_ASYNC_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); + break; + case MLX5_EQ_PAGEREQ_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); + break; + case MLX5_EQ_PFAULT_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); + break; + default: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_EQ_VEC_COMP_BASE); + break; + } +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + for (i = 0; i < nvec; i++) { + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq_info->nh); + } + return err; +} + +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_EQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + +/* Completion IRQ vectors */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_irq_info *irq_info; + int irq; + + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), + irq_info->mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, irq_info->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", + irq); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_irq_info *irq_info; + int irq; + + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(irq_info->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int err; + int i; + + for (i = 0; i < nvec; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int i; + + for (i = 0; i < nvec; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq_info[vecidx].mask; +} + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} +#endif + +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_table *table = priv->irq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); + if (!table->irq_info) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } + + table->nvec = nvec; + + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + + err = request_irqs(dev, nvec); + if (err) + goto err_request_irqs; + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto err_set_affinity; + } + + return 0; + +err_set_affinity: + unrequest_irqs(dev); +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: + pci_free_irq_vectors(dev->pdev); +err_free_irq_info: + kfree(table->irq_info); + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); + pci_free_irq_vectors(dev->pdev); + kfree(table->irq_info); +} + From cf49f41d29467ccec16b12f77475cc217132c572 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:39 +0000 Subject: [PATCH 14/32] net/mlx5: Rename mlx5_irq_info to mlx5_irq struct mlx5_irq_info is an active object and not just info. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 77 +++++++++---------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 75408639d150..fec861f4fefe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -12,14 +12,14 @@ #define MLX5_MAX_IRQ_NAME (32) -struct mlx5_irq_info { +struct mlx5_irq { struct atomic_notifier_head nh; cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; struct mlx5_irq_table { - struct mlx5_irq_info *irq_info; + struct mlx5_irq *irq; int nvec; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; @@ -48,29 +48,29 @@ int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) return table->nvec - MLX5_EQ_VEC_COMP_BASE; } -static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) { struct mlx5_irq_table *irq_table = dev->priv.irq_table; - return &irq_table->irq_info[vecidx]; + return &irq_table->irq[vecidx]; } int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - struct mlx5_irq_info *irq_info; + struct mlx5_irq *irq; - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_register(&irq_info->nh, nb); + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_register(&irq->nh, nb); } int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - struct mlx5_irq_info *irq_info; + struct mlx5_irq *irq; - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_unregister(&irq_info->nh, nb); + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_unregister(&irq->nh, nb); } static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) @@ -108,15 +108,15 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) int i; for (i = 0; i < nvec; i++) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); irq_set_name(name, i); - ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); - snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); - err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, - &irq_info->nh); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name, + &irq->nh); if (err) { mlx5_core_err(dev, "Failed to request irq\n"); goto err_request_irq; @@ -126,10 +126,10 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) err_request_irq: for (; i >= 0; i--) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); - free_irq(irqn, &irq_info->nh); + free_irq(irqn, &irq->nh); } return err; } @@ -183,23 +183,22 @@ static int irq_set_rmap(struct mlx5_core_dev *mdev) static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_irq_info *irq_info; - int irq; + struct mlx5_irq *irq; + int irqn; - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), - irq_info->mask); - + irq->mask); if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) + irq_set_affinity_hint(irqn, irq->mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", - irq); + irqn); return 0; } @@ -207,13 +206,13 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_irq_info *irq_info; - int irq; + struct mlx5_irq *irq; + int irqn; - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irqn, NULL); + free_cpumask_var(irq->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) @@ -249,7 +248,7 @@ static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) struct cpumask * mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) { - return irq_table->irq_info[vecidx].mask; + return irq_table->irq[vecidx].mask; } #ifdef CONFIG_RFS_ACCEL @@ -285,15 +284,15 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) + table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); + if (!table->irq) return -ENOMEM; nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, nvec, PCI_IRQ_MSIX); if (nvec < 0) { err = nvec; - goto err_free_irq_info; + goto err_free_irq; } table->nvec = nvec; @@ -320,8 +319,8 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) irq_clear_rmap(dev); err_set_rmap: pci_free_irq_vectors(dev->pdev); -err_free_irq_info: - kfree(table->irq_info); +err_free_irq: + kfree(table->irq); return err; } @@ -340,6 +339,6 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); pci_free_irq_vectors(dev->pdev); - kfree(table->irq_info); + kfree(table->irq); } From 81bfa206032a67f0700459a64a5493c246629604 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 10 Jun 2019 23:38:41 +0000 Subject: [PATCH 15/32] net/mlx5: Use a single IRQ for all async EQs The patch modifies the IRQ allocation so that all async EQs are assigned to the same IRQ resulting in more available IRQs for completion EQs. The changes are using the support for IRQ sharing and EQ polling budget that was introduced in previous patches so when the shared interrupt is triggered, the kernel will serially call the handler of each of the sharing EQs with a certain budget of EQEs to poll in order to prevent starvation. Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 19 +++++----- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 38 +++++++------------ include/linux/mlx5/eq.h | 14 +------ 4 files changed, 27 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7ce7c5bfe685..693a0e225093 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1557,7 +1557,7 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PFAULT_IDX, + .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, .nb = &eq->irq_nb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0c72c122daef..0f5846a34928 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -250,7 +250,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->index; + u8 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; @@ -435,8 +435,9 @@ static int create_async_eq(struct mlx5_core_dev *dev, int err; mutex_lock(&eq_table->lock); - if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { - err = -ENOSPC; + /* Async EQs must share irq index 0 */ + if (param->irq_index != 0) { + err = -EINVAL; goto unlock; } @@ -540,7 +541,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_CMD_IDX, + .irq_index = 0, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, .nb = &table->cmd_eq.irq_nb, @@ -555,7 +556,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_ASYNC_IDX, + .irq_index = 0, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, .nb = &table->async_eq.irq_nb, @@ -568,7 +569,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PAGEREQ_IDX, + .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, .nb = &table->pages_eq.irq_nb, @@ -731,7 +732,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; for (i = 0; i < ncomp_eqs; i++) { - int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); @@ -748,7 +749,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .index = vecidx, + .irq_index = vecidx, .mask = 0, .nent = nent, .nb = &eq->irq_nb, @@ -800,7 +801,7 @@ EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; + int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index fec861f4fefe..373981a659c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -45,7 +45,7 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) { - return table->nvec - MLX5_EQ_VEC_COMP_BASE; + return table->nvec - MLX5_IRQ_VEC_COMP_BASE; } static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) @@ -81,24 +81,14 @@ static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) static void irq_set_name(char *name, int vecidx) { - switch (vecidx) { - case MLX5_EQ_CMD_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); - break; - case MLX5_EQ_ASYNC_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); - break; - case MLX5_EQ_PAGEREQ_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); - break; - case MLX5_EQ_PFAULT_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); - break; - default: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", - vecidx - MLX5_EQ_VEC_COMP_BASE); - break; + if (vecidx == 0) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); + return; } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_IRQ_VEC_COMP_BASE); + return; } static int request_irqs(struct mlx5_core_dev *dev, int nvec) @@ -159,7 +149,7 @@ static int irq_set_rmap(struct mlx5_core_dev *mdev) goto err_out; } - vecidx = MLX5_EQ_VEC_COMP_BASE; + vecidx = MLX5_IRQ_VEC_COMP_BASE; for (; vecidx < irq_table->nvec; vecidx++) { err = irq_cpu_rmap_add(irq_table->rmap, pci_irq_vector(mdev->pdev, vecidx)); @@ -182,7 +172,7 @@ static int irq_set_rmap(struct mlx5_core_dev *mdev) static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; @@ -205,7 +195,7 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; @@ -279,16 +269,16 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) int err; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; + MLX5_IRQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) + if (nvec <= MLX5_IRQ_VEC_COMP_BASE) return -ENOMEM; table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); if (!table->irq) return -ENOMEM; - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, nvec, PCI_IRQ_MSIX); if (nvec < 0) { err = nvec; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 73ab658af764..4a94e04eff0a 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -4,17 +4,7 @@ #ifndef MLX5_CORE_EQ_H #define MLX5_CORE_EQ_H -enum { - MLX5_EQ_PAGEREQ_IDX = 0, - MLX5_EQ_CMD_IDX = 1, - MLX5_EQ_ASYNC_IDX = 2, - /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */ - MLX5_EQ_PFAULT_IDX = 3, - MLX5_EQ_MAX_ASYNC_EQS, - /* completion eqs vector indices start here */ - MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, -}; - +#define MLX5_IRQ_VEC_COMP_BASE 1 #define MLX5_NUM_CMD_EQE (32) #define MLX5_NUM_ASYNC_EQE (0x1000) #define MLX5_NUM_SPARE_EQE (0x80) @@ -23,7 +13,7 @@ struct mlx5_eq; struct mlx5_core_dev; struct mlx5_eq_param { - u8 index; + u8 irq_index; int nent; u64 mask; struct notifier_block *nb; From 1f8a7bee27e63d7c5287719049941e285e54d370 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:42 +0000 Subject: [PATCH 16/32] net/mlx5: Add EQ enable/disable API Previously, EQ joined the chain notifier on creation. This forced the caller to be ready to handle events before creating the EQ through eq_create_generic interface. To help the caller control when the created EQ will be attached to the IRQ, add enable/disable API. Signed-off-by: Yuval Avnery Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 105 +++++++++++++----- .../net/ethernet/mellanox/mlx5/core/lib/eq.h | 1 - include/linux/mlx5/eq.h | 5 +- 4 files changed, 88 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 693a0e225093..12ccee1eb047 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1560,15 +1560,21 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, - .nb = &eq->irq_nb, }; eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; } + err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb); + if (err) { + mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err); + goto err_eq; + } return 0; +err_eq: + mlx5_eq_destroy_generic(dev->mdev, eq->core); err_wq: destroy_workqueue(eq->wq); err_mempool: @@ -1581,6 +1587,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) { int err; + mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb); err = mlx5_eq_destroy_generic(dev->mdev, eq->core); cancel_work_sync(&eq->work); destroy_workqueue(eq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0f5846a34928..58fff2f39b38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -304,27 +304,14 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - eq->irq_nb = param->nb; - - err = mlx5_irq_attach_nb(dev->priv.eq_table->irq_table, vecidx, - param->nb); - if (err) - goto err_eq; err = mlx5_debug_eq_add(dev, eq); if (err) - goto err_detach; - - /* EQs are created in ARMED state - */ - eq_update_ci(eq, 1); + goto err_eq; kvfree(in); return 0; -err_detach: - mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, vecidx, eq->irq_nb); - err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -336,17 +323,49 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, return err; } +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to enable + * @nb - notifier call block + * mlx5_eq_enable - must be called after EQ is created in device. + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + if (!err) + eq_update_ci(eq, 1); + + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); + +/** + * mlx5_eq_disable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to disable + * @nb - notifier call block + * mlx5_eq_disable - must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, - eq->vecidx, eq->irq_nb); - if (err) - mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", - err); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -544,14 +563,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, - .nb = &table->cmd_eq.irq_nb, }; err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; } - + err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + goto err1; + } mlx5_cmd_use_events(dev); table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; @@ -559,12 +581,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, - .nb = &table->async_eq.irq_nb, }; err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); - goto err1; + goto err2; + } + err = mlx5_eq_enable(dev, &table->async_eq.core, + &table->async_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); + goto err3; } table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; @@ -572,21 +599,31 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, - .nb = &table->pages_eq.irq_nb, }; err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err2; + goto err4; + } + err = mlx5_eq_enable(dev, &table->pages_eq.core, + &table->pages_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); + goto err5; } return err; -err2: +err5: + destroy_async_eq(dev, &table->pages_eq.core); +err4: + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); +err3: destroy_async_eq(dev, &table->async_eq.core); - -err1: +err2: mlx5_cmd_use_polling(dev); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); +err1: destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); @@ -598,11 +635,13 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; + mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", @@ -610,6 +649,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_polling(dev); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", @@ -711,6 +751,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); @@ -752,13 +793,19 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) .irq_index = vecidx, .mask = 0, .nent = nent, - .nb = &eq->irq_nb, }; err = create_map_eq(dev, &eq->core, ¶m); if (err) { kfree(eq); goto clean; } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + destroy_unmap_eq(dev, &eq->core); + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 3836c39b2900..24bd991a727e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -33,7 +33,6 @@ struct mlx5_eq { u8 eqn; int nent; struct mlx5_rsc_debug *dbg; - struct notifier_block *irq_nb; /* For destroy only */ }; struct mlx5_eq_async { diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 4a94e04eff0a..70e16dcfb4c4 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -16,13 +16,16 @@ struct mlx5_eq_param { u8 irq_index; int nent; u64 mask; - struct notifier_block *nb; }; struct mlx5_eq * mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param); int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb); +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb); struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc); void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm); From 98fdbea550378e0153092bce21261df86a8ccc57 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 12 Jun 2019 15:20:11 +0300 Subject: [PATCH 17/32] net/mlx5: Declare more strictly devlink encap mode Devlink has UAPI declaration for encap mode, so there is no need to be loose on the data get/set by drivers. Update call sites to use enum devlink_eswitch_encap_mode instead of plain u8. Suggested-by: Parav Pandit Signed-off-by: Leon Romanovsky Acked-by: Jiri Pirko Reviewed-by: Parav Pandit Reviewed-by: Petr Vorel --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 +++++--- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 ++++-- include/net/devlink.h | 6 ++++-- net/core/devlink.c | 6 ++++-- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index e03811be771d..8b9f2cf58e91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -176,7 +176,7 @@ struct mlx5_esw_offload { const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; - u8 encap; + enum devlink_eswitch_encap_mode encap; }; /* E-Switch MC FDB table hash node */ @@ -357,9 +357,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1638e4cdeb16..17abb98b48af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2160,7 +2160,8 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) return 0; } -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -2209,7 +2210,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; } -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; diff --git a/include/net/devlink.h b/include/net/devlink.h index 1c4adfb4195a..7a34fc586def 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -530,8 +530,10 @@ struct devlink_ops { int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode, struct netlink_ext_ack *extack); - int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); - int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode, + int (*eswitch_encap_mode_get)(struct devlink *devlink, + enum devlink_eswitch_encap_mode *p_encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap_mode, struct netlink_ext_ack *extack); int (*info_get)(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack); diff --git a/net/core/devlink.c b/net/core/devlink.c index d43bc52b8840..47ae69363b07 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1552,7 +1552,8 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; - u8 inline_mode, encap_mode; + enum devlink_eswitch_encap_mode encap_mode; + u8 inline_mode; void *hdr; int err = 0; u16 mode; @@ -1628,7 +1629,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; - u8 inline_mode, encap_mode; + enum devlink_eswitch_encap_mode encap_mode; + u8 inline_mode; int err = 0; u16 mode; From 82b11f071936a11094e1c44730030cd3d894e0b4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 12 Jun 2019 15:20:12 +0300 Subject: [PATCH 18/32] net/mlx5: Expose eswitch encap mode Add API to get the current Eswitch encap mode. It will be used in downstream patches to check if flow table can be created with encap support or not. Signed-off-by: Maor Gottlieb Reviewed-by: Petr Vorel Signed-off-by: Leon Romanovsky Reviewed-by: Parav Pandit --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++++++++++ include/linux/mlx5/eswitch.h | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a4df109fbeb7..12010f85fa35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2457,6 +2457,17 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return ESW_ALLOWED(esw) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { if ((dev0->priv.eswitch->mode == SRIOV_NONE && diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index d81ee4df181c..174eec0871d9 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -7,6 +7,7 @@ #define _MLX5_ESWITCH_ #include +#include #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) @@ -62,4 +63,15 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); + +#ifdef CONFIG_MLX5_ESWITCH +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); +#else /* CONFIG_MLX5_ESWITCH */ +static inline enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + return DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +#endif /* CONFIG_MLX5_ESWITCH */ #endif From 792c4e9d0bbb53b34bf1c07c2ef25609d746c57d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 20 Jun 2019 07:03:47 +0000 Subject: [PATCH 19/32] net/mlx5: Convert mkey_table to XArray The lock protecting the data structure does not need to be an rwlock. The only read access to the lock is in an error path, and if that's limiting your scalability, you have bigger performance problems. Eliminate mlx5_mkey_table in favour of using the xarray directly. reg_mr_callback must use GFP_ATOMIC for allocating XArray nodes as it may be called in interrupt context. This also fixes a minor bug where SRCU locking was being used on the radix tree read side, when RCU was needed too. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/cq.c | 8 +++--- drivers/infiniband/hw/mlx5/devx.c | 18 +++---------- drivers/infiniband/hw/mlx5/mr.c | 10 ++++---- drivers/infiniband/hw/mlx5/odp.c | 10 ++++---- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 27 ++++++++------------ include/linux/mlx5/driver.h | 13 ++-------- include/linux/mlx5/qp.h | 5 ---- 7 files changed, 31 insertions(+), 60 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2e2e65f00257..0220736b073e 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -522,9 +522,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, case MLX5_CQE_SIG_ERR: sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; - read_lock(&dev->mdev->priv.mkey_table.lock); - mmkey = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + xa_lock(&dev->mdev->priv.mkey_table); + mmkey = xa_load(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; @@ -537,7 +537,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, mr->sig->err_item.expected, mr->sig->err_item.actual); - read_unlock(&dev->mdev->priv.mkey_table.lock); + xa_unlock(&dev->mdev->priv.mkey_table); goto repoll; } diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 80b42d069328..931f587dfb8f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1043,13 +1043,10 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, struct mlx5_ib_dev *dev, void *in, void *out) { - struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; - unsigned long flags; struct mlx5_core_mkey *mkey; void *mkc; u8 key; - int err; mkey = &devx_mr->mmkey; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); @@ -1062,11 +1059,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); - write_lock_irqsave(&table->lock, flags); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), - mkey); - write_unlock_irqrestore(&table->lock, flags); - return err; + return xa_err(xa_store(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL)); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1117,12 +1111,8 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu) */ static void devx_cleanup_mkey(struct devx_obj *obj) { - struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; - unsigned long flags; - - write_lock_irqsave(&table->lock, flags); - radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key)); - write_unlock_irqrestore(&table->lock, flags); + xa_erase(&obj->mdev->priv.mkey_table, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); } static int devx_obj_cleanup(struct ib_uobject *uobject, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 5f09699fab98..83b452d977d4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -130,7 +130,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; + struct xarray *mkeys = &dev->mdev->priv.mkey_table; int err; spin_lock_irqsave(&ent->lock, flags); @@ -158,12 +158,12 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) ent->size++; spin_unlock_irqrestore(&ent->lock, flags); - write_lock_irqsave(&table->lock, flags); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey); + xa_lock_irqsave(mkeys, flags); + err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_ATOMIC)); + xa_unlock_irqrestore(mkeys, flags); if (err) pr_err("Error inserting to mkey tree. 0x%x\n", -err); - write_unlock_irqrestore(&table->lock, flags); if (!completion_done(&ent->compl)) complete(&ent->compl); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 12ccee1eb047..c594489eb2d7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -768,7 +768,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, bcnt -= *bytes_committed; next_mr: - mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key)); + mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key)); if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; @@ -1686,8 +1686,8 @@ static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; - mmkey = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(sg_list[i].lkey)); + mmkey = xa_load(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(sg_list[i].lkey)); mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); atomic_dec(&mr->num_pending_prefetch); } @@ -1706,8 +1706,8 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd, struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; - mmkey = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(sg_list[i].lkey)); + mmkey = xa_load(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(sg_list[i].lkey)); if (!mmkey || mmkey->key != sg_list[i].lkey) { ret = false; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index ea744d8466ea..9231b39d18b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -38,15 +38,12 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; - - memset(table, 0, sizeof(*table)); - rwlock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); } void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { + WARN_ON(!xa_empty(&dev->priv.mkey_table)); } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_async_cbk_t callback, struct mlx5_async_work *context) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - /* connect to mkey tree */ - write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); - write_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", mlx5_base_mkey(mkey->key), err); mlx5_core_destroy_mkey(dev, mkey); } @@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - write_lock_irqsave(&table->lock, flags); - deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); - write_unlock_irqrestore(&table->lock, flags); + xa_lock_irqsave(mkeys, flags); + deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + xa_unlock_irqrestore(mkeys, flags); if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", mlx5_base_mkey(mkey->key)); return -ENOENT; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d8ab633406c2..87f77ded78d4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include @@ -452,13 +452,6 @@ struct mlx5_qp_table { struct radix_tree_root tree; }; -struct mlx5_mkey_table { - /* protect radix tree - */ - rwlock_t lock; - struct radix_tree_root tree; -}; - struct mlx5_vf_context { int enabled; u64 port_guid; @@ -546,9 +539,7 @@ struct mlx5_priv { struct dentry *cmdif_debugfs; /* end: qp staff */ - /* start: mkey staff */ - struct mlx5_mkey_table mkey_table; - /* end: mkey staff */ + struct xarray mkey_table; /* start: alloc staff */ /* protect buffer alocation according to numa node */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 3ba4edbd17a6..d1f353c64797 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -551,11 +551,6 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } -static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) -{ - return radix_tree_lookup(&dev->priv.mkey_table.tree, key); -} - int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *qp, u32 *in, int inlen, From 65c0f2c1663649217455a73d48b1c303f133180a Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:47:50 +0000 Subject: [PATCH 20/32] net/mlx5: Introduce vport metadata matching bits and enum constants When a dual-port VHCA sends a RoCE packet on its non-native port, and the packet arrives to its affiliated vport FDB, a mismatch might occur on the rules that match the packet source vport. So we replace the match on source port with the match on metadata that was configured in ingress ACL, and that metadata will be passed further also to the NIC RX table of the eswitch manager. Introduce vport metadata matching bits and enum constants as a pre-step towards metadata matching. o metadata type C registers in the misc parameters 2 fields. o esw_uplink_ingress_acl bit in esw cap. If it set, the device supports ingress ACL for the uplink vport. o fdb_to_vport_reg_* bits in flow table cap and esw vport context, to support propagating the metadata to the nic rx through the loopback path. o flow_source in flow context, to indicate the known origin of packets. o enum constants, to support the above bits. Signed-off-by: Jianbo Liu Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 56 ++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e3c154b573a2..d4409654f760 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -528,7 +528,21 @@ struct mlx5_ifc_fte_match_set_misc2_bits { struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp; - u8 reserved_at_80[0x100]; + u8 metadata_reg_c_7[0x20]; + + u8 metadata_reg_c_6[0x20]; + + u8 metadata_reg_c_5[0x20]; + + u8 metadata_reg_c_4[0x20]; + + u8 metadata_reg_c_3[0x20]; + + u8 metadata_reg_c_2[0x20]; + + u8 metadata_reg_c_1[0x20]; + + u8 metadata_reg_c_0[0x20]; u8 metadata_reg_a[0x20]; @@ -636,8 +650,22 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 reserved_at_e00[0x7200]; }; +enum { + MLX5_FDB_TO_VPORT_REG_C_0 = 0x01, + MLX5_FDB_TO_VPORT_REG_C_1 = 0x02, + MLX5_FDB_TO_VPORT_REG_C_2 = 0x04, + MLX5_FDB_TO_VPORT_REG_C_3 = 0x08, + MLX5_FDB_TO_VPORT_REG_C_4 = 0x10, + MLX5_FDB_TO_VPORT_REG_C_5 = 0x20, + MLX5_FDB_TO_VPORT_REG_C_6 = 0x40, + MLX5_FDB_TO_VPORT_REG_C_7 = 0x80, +}; + struct mlx5_ifc_flow_table_eswitch_cap_bits { - u8 reserved_at_0[0x1a]; + u8 fdb_to_vport_reg_c_id[0x8]; + u8 reserved_at_8[0xf]; + u8 flow_source[0x1]; + u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; u8 reserved_at_1b[0x1]; u8 fdb_multi_path_to_table[0x1]; @@ -665,7 +693,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x14]; + u8 reserved_at_5[0x3]; + u8 esw_uplink_ingress_acl[0x1]; + u8 reserved_at_9[0x10]; u8 esw_functions_changed[0x1]; u8 reserved_at_1a[0x1]; u8 ecpf_vport_exists[0x1]; @@ -2555,6 +2585,12 @@ enum { MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800, }; +enum { + MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT = 0x0, + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK = 0x1, + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT = 0x2, +}; + struct mlx5_ifc_vlan_bits { u8 ethtype[0x10]; u8 prio[0x3]; @@ -2574,7 +2610,9 @@ struct mlx5_ifc_flow_context_bits { u8 action[0x10]; u8 extended_destination[0x1]; - u8 reserved_at_80[0x7]; + u8 reserved_at_81[0x1]; + u8 flow_source[0x2]; + u8 reserved_at_84[0x4]; u8 destination_list_size[0x18]; u8 reserved_at_a0[0x8]; @@ -3099,12 +3137,14 @@ struct mlx5_ifc_hca_vport_context_bits { }; struct mlx5_ifc_esw_vport_context_bits { - u8 reserved_at_0[0x3]; + u8 fdb_to_vport_reg_c[0x1]; + u8 reserved_at_1[0x2]; u8 vport_svlan_strip[0x1]; u8 vport_cvlan_strip[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert[0x2]; - u8 reserved_at_8[0x18]; + u8 fdb_to_vport_reg_c_id[0x8]; + u8 reserved_at_10[0x10]; u8 reserved_at_20[0x20]; @@ -4985,7 +5025,8 @@ struct mlx5_ifc_modify_esw_vport_context_out_bits { }; struct mlx5_ifc_esw_vport_context_fields_select_bits { - u8 reserved_at_0[0x1c]; + u8 reserved_at_0[0x1b]; + u8 fdb_to_vport_reg_c_id[0x1]; u8 vport_cvlan_insert[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_strip[0x1]; @@ -5182,6 +5223,7 @@ enum { MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17, MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0 = 0x51, }; struct mlx5_ifc_alloc_modify_header_context_out_bits { From f53297d67800feb5fafd94abd926c889aefee690 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:47:52 +0000 Subject: [PATCH 21/32] net/mlx5: Get vport ACL namespace by vport index The ingress and egress ACL root namespaces are created per vport and stored into arrays. However, the vport number is not the same as the index. Passing the array index, instead of vport number, to get the correct ingress and egress acl namespace. Fixes: 9b93ab981e3b ("net/mlx5: Separate ingress/egress namespaces for each vport") Signed-off-by: Jianbo Liu Reviewed-by: Oz Shlomo Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 12010f85fa35..a42a23e505df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -939,7 +939,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1057,7 +1057,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; From 84b0d6a7a11ec976da347d50de9d5a556743de16 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:47:54 +0000 Subject: [PATCH 22/32] net/mlx5: Support allocating modify header context from ingress ACL That modify header action can be then attached to a steering rule in the ingress ACL. Signed-off-by: Jianbo Liu Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index bb24c3797218..4f1d402926f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -771,6 +771,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); + table_type = FS_FT_ESW_INGRESS_ACL; + break; default: return -EOPNOTSUPP; } From 91d6291c4e544408e90e606bbaace88923d84167 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 25 Jun 2019 17:47:56 +0000 Subject: [PATCH 23/32] net/mlx5: Introduce a helper API to check VF vport Introduce a helper API mlx5_eswitch_is_vf_vport() to check if a given vport_num belongs to VF or not. Signed-off-by: Parav Pandit Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8b9f2cf58e91..99dc25630629 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -508,6 +508,8 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 17abb98b48af..c1c42c1370b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2290,3 +2290,9 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); + +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num >= MLX5_VPORT_FIRST_VF && + vport_num <= esw->dev->priv.sriov.max_vfs; +} From bb0ee7dcc4ecd6af39823b80ae3995ddc119c373 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:47:58 +0000 Subject: [PATCH 24/32] net/mlx5: Add flow context for flow tag Refactor the flow data structures, add new flow_context and move flow_tag into it, as flow_tag doesn't belong to the rule action. Signed-off-by: Jianbo Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/flow.c | 13 ++++--- drivers/infiniband/hw/mlx5/main.c | 30 ++++++++++------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + .../mellanox/mlx5/core/diag/fs_tracepoint.h | 2 +- .../mellanox/mlx5/core/en_fs_ethtool.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 7 ++-- .../ethernet/mellanox/mlx5/core/fpga/ipsec.c | 8 +++-- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 +- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 34 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + include/linux/mlx5/fs.h | 15 +++++--- 11 files changed, 71 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 1fc302d41a53..b8841355fcd5 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -65,11 +65,12 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct uverbs_attr_bundle *attrs) { - struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; + struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; struct ib_uobject **arr_flow_actions; struct ib_uflow_resources *uflow_res; + struct mlx5_flow_act flow_act = {}; void *devx_obj; int dest_id, dest_type; void *cmd_in; @@ -172,17 +173,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( arr_flow_actions[i]->object); } - ret = uverbs_copy_from(&flow_act.flow_tag, attrs, + ret = uverbs_copy_from(&flow_context.flow_tag, attrs, MLX5_IB_ATTR_CREATE_FLOW_TAG); if (!ret) { - if (flow_act.flow_tag >= BIT(24)) { + if (flow_context.flow_tag >= BIT(24)) { ret = -EINVAL; goto err_out; } - flow_act.flags |= FLOW_ACT_HAS_TAG; + flow_context.flags |= FLOW_CONTEXT_HAS_TAG; } - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, + &flow_context, + &flow_act, counter_id, cmd_in, inlen, dest_id, dest_type); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index abac70ad5c7c..be4c9a687df7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2666,11 +2666,15 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, } } -static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, - u32 *match_v, const union ib_flow_spec *ib_spec, +static int parse_flow_attr(struct mlx5_core_dev *mdev, + struct mlx5_flow_spec *spec, + const union ib_flow_spec *ib_spec, const struct ib_flow_attr *flow_attr, struct mlx5_flow_act *action, u32 prev_type) { + struct mlx5_flow_context *flow_context = &spec->flow_context; + u32 *match_c = spec->match_criteria; + u32 *match_v = spec->match_value; void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, @@ -2989,8 +2993,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ib_spec->flow_tag.tag_id >= BIT(24)) return -EINVAL; - action->flow_tag = ib_spec->flow_tag.tag_id; - action->flags |= FLOW_ACT_HAS_TAG; + flow_context->flow_tag = ib_spec->flow_tag.tag_id; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; break; case IB_FLOW_SPEC_ACTION_DROP: if (FIELDS_NOT_SUPPORTED(ib_spec->drop, @@ -3084,7 +3088,8 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, return VALID_SPEC_NA; return is_crypto && is_ipsec && - (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ? + (!egress || (!is_drop && + !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? VALID_SPEC_VALID : VALID_SPEC_INVALID; } @@ -3473,7 +3478,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; + struct mlx5_flow_act flow_act = {}; struct mlx5_flow_spec *spec; struct mlx5_flow_destination dest_arr[2] = {}; struct mlx5_flow_destination *rule_dst = dest_arr; @@ -3504,8 +3509,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(dev->mdev, spec->match_criteria, - spec->match_value, + err = parse_flow_attr(dev->mdev, spec, ib_flow, flow_attr, &flow_act, prev_type); if (err < 0) @@ -3572,11 +3576,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } - if ((flow_act.flags & FLOW_ACT_HAS_TAG) && + if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", - flow_act.flow_tag, flow_attr->type); + spec->flow_context.flow_tag, flow_attr->type); err = -EINVAL; goto free; } @@ -3947,6 +3951,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, int dst_num) @@ -3969,6 +3974,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, fs_matcher->mask_len); spec->match_criteria_enable = fs_matcher->match_criteria_enable; + spec->flow_context = *flow_context; handler->rule = mlx5_add_flow_rules(ft, spec, flow_act, dst, dst_num); @@ -4033,6 +4039,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, @@ -4085,7 +4092,8 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, dst_num++; } - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, + flow_context, flow_act, cmd_in, inlen, dst_num); if (IS_ERR(handler)) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a043af7ee366..1c205c2bd486 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1317,6 +1317,7 @@ extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index a4cf123e3f17..9ec46edf22a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -204,7 +204,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->index = fte->index; __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; - __entry->flow_tag = fte->action.flow_tag; + __entry->flow_tag = fte->flow_context.flow_tag; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 4421c10f58ae..839662644ed3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -426,7 +426,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 122f457091a2..8ff1ca46d8d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -716,19 +716,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .flow_tag = attr->flow_tag, .reformat_id = 0, - .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, + .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; bool table_created = false; int err, dest_ix = 0; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = attr->flow_tag; + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 52c47d3dd5a5..c76da309506b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, u8 match_criteria_enable, const u32 *match_c, const u32 *match_v, - struct mlx5_flow_act *flow_act) + struct mlx5_flow_act *flow_act, + struct mlx5_flow_context *flow_context) { const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); @@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_act->flags & FLOW_ACT_HAS_TAG)) + (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) return false; return true; @@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, fg->mask.match_criteria_enable, fg->mask.match_criteria, fte->val, - &fte->action)) + &fte->action, + &fte->flow_context)) return ERR_PTR(-EINVAL); else if (!mlx5_is_fpga_ipsec_rule(mdev, fg->mask.match_criteria_enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 4f1d402926f1..fb1335a433ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -396,7 +396,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); - MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fb5b61727ee7..9f5544ac6b8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -584,7 +584,7 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) } static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, - u32 *match_value, + struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act) { struct mlx5_flow_steering *steering = get_steering(&ft->node); @@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, if (!fte) return ERR_PTR(-ENOMEM); - memcpy(fte->val, match_value, sizeof(fte->val)); + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; + fte->flow_context = spec->flow_context; tree_init_node(&fte->node, NULL, del_sw_fte); @@ -1428,7 +1429,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; } -static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) { if (check_conflicting_actions(flow_act->action, fte->action.action)) { mlx5_core_warn(get_dev(&fte->node), @@ -1436,12 +1439,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if ((flow_act->flags & FLOW_ACT_HAS_TAG) && - fte->action.flow_tag != flow_act->flow_tag) { + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", - fte->action.flow_tag, - flow_act->flow_tag); + fte->flow_context.flow_tag, + flow_context->flow_tag); return -EEXIST; } @@ -1449,7 +1452,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act } static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, + struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1460,7 +1463,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int i; int ret; - ret = check_conflicting_ftes(fte, flow_act); + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); if (ret) return ERR_PTR(ret); @@ -1635,7 +1638,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1651,8 +1654,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); if (!fte_tmp) continue; - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte_tmp); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1699,8 +1701,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); return rule; @@ -1786,7 +1787,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (err) goto err_release_fg; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1800,8 +1801,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); tree_put_node(&g->node, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index a08c3d09a50f..c48c382f926f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -170,6 +170,7 @@ struct fs_fte { u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; + struct mlx5_flow_context flow_context; struct mlx5_flow_act action; enum fs_fte_status status; struct mlx5_fc *counter; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 2ddaa97f2179..9bf49ce218fa 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -88,10 +88,20 @@ struct mlx5_flow_group; struct mlx5_flow_namespace; struct mlx5_flow_handle; +enum { + FLOW_CONTEXT_HAS_TAG = BIT(0), +}; + +struct mlx5_flow_context { + u32 flags; + u32 flow_tag; +}; + struct mlx5_flow_spec { u8 match_criteria_enable; u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; u32 match_value[MLX5_ST_SZ_DW(fte_match_param)]; + struct mlx5_flow_context flow_context; }; enum { @@ -173,13 +183,11 @@ struct mlx5_fs_vlan { #define MLX5_FS_VLAN_DEPTH 2 enum { - FLOW_ACT_HAS_TAG = BIT(0), - FLOW_ACT_NO_APPEND = BIT(1), + FLOW_ACT_NO_APPEND = BIT(0), }; struct mlx5_flow_act { u32 action; - u32 flow_tag; u32 reformat_id; u32 modify_id; uintptr_t esp_id; @@ -190,7 +198,6 @@ struct mlx5_flow_act { #define MLX5_DECLARE_FLOW_ACT(name) \ struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \ .reformat_id = 0, \ .modify_id = 0, \ .flags = 0, } From 7445cfb1169cebf8f79763acf65f85d850850461 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:00 +0000 Subject: [PATCH 25/32] net/mlx5: E-Switch, Tag packet with vport number in VF vports and uplink ingress ACLs When a dual-port VHCA sends a RoCE packet on its non-native port, and the packet arrives to its affiliated vport FDB, a mismatch might occur on the rules that match the packet source vport as it is not represented by single VHCA only in this case. So we change to match on metadata instead of source vport. To do that, a rule is created in all vports and uplink ingress ACLs, to save the source vport number and vhca id in the packet's metadata in order to match on it later. The metadata register used is the first of the 32-bit type C registers. It can be used for matching and header modify operations. The higher 16 bits of this register are for vhca id, and the lower 16 ones is for vport number. This change is not for dual-port RoCE only. If HW and FW allow, the vport metadata matching is enabled by default. Signed-off-by: Jianbo Liu Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 2 + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 9 + .../mellanox/mlx5/core/eswitch_offloads.c | 180 ++++++++++++++---- include/linux/mlx5/eswitch.h | 17 ++ 4 files changed, 172 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a42a23e505df..1235fd84ae3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1168,6 +1168,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; + + esw_vport_del_ingress_acl_modify_metadata(esw, vport); } void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 99dc25630629..51e71b824abf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -68,6 +68,8 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; + int modify_metadata_id; + struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; @@ -196,6 +198,10 @@ struct mlx5_esw_functions { u16 num_vfs; }; +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -203,6 +209,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; + u32 flags; int total_vports; int enabled_vports; /* Synchronize between vport change events @@ -240,6 +247,8 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c1c42c1370b8..4bcbc872cd08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1555,32 +1555,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; /* For prio tag mode, there is only 1 FTEs: - * 1) Untagged packets - push prio tag VLAN, allow + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow * Unmatched traffic is allowed by default */ - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - esw_vport_cleanup_ingress_rules(esw, vport); - - err = esw_vport_enable_ingress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable prio tag ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - - esw_debug(esw->dev, - "vport[%d] configure ingress rules\n", vport->vport); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { err = -ENOMEM; @@ -1596,6 +1580,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, flow_act.vlan[0].ethtype = ETH_P_8021Q; flow_act.vlan[0].vid = 0; flow_act.vlan[0].prio = 0; + + if (vport->ingress.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_id = vport->ingress.modify_metadata_id; + } + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); @@ -1616,6 +1606,58 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, return err; } +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec spec = {}; + int err = 0; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + + err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action, &vport->ingress.modify_metadata_id); + if (err) { + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_id = vport->ingress.modify_metadata_id; + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + vport->ingress.modify_metadata_rule = NULL; + goto out; + } + +out: + if (err) + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + return err; +} + +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->ingress.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + + vport->ingress.modify_metadata_rule = NULL; + } +} + static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1623,6 +1665,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec; int err = 0; + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + /* For prio tag mode, there is only 1 FTEs: * 1) prio tag packets - pop the prio tag VLAN, allow * Unmatched traffic is allowed by default @@ -1676,27 +1721,75 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, return err; } -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports) +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = NULL; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_ingress_rules(esw, vport); + + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + esw_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); + if (err) + goto out; + } + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_ingress_prio_tag_config(esw, vport); + if (err) + goto out; + } + +out: + if (err) + esw_vport_disable_ingress_acl(esw, vport); + return err; +} + +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; int i, j; int err; - mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) { - err = esw_vport_ingress_prio_tag_config(esw, vport); + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_vport_ingress_common_config(esw, vport); if (err) goto err_ingress; - err = esw_vport_egress_prio_tag_config(esw, vport); - if (err) - goto err_egress; + + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_prio_tag_config(esw, vport); + if (err) + goto err_egress; + } } + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); + return 0; err_egress: esw_vport_disable_ingress_acl(esw, vport); err_ingress: - mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) { + for (j = MLX5_VPORT_PF; j < i; j++) { + vport = &esw->vports[j]; esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1704,15 +1797,17 @@ static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports) return err; } -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; int i; - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) { + mlx5_esw_for_all_vports(esw, i, vport) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } + + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) @@ -1722,15 +1817,13 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { - err = esw_prio_tag_acls_config(esw, nvports); - if (err) - return err; - } + err = esw_create_offloads_acl_tables(esw); + if (err) + return err; err = esw_create_offloads_fdb_tables(esw, nvports); if (err) - return err; + goto create_fdb_err; err = esw_create_offloads_table(esw, nvports); if (err) @@ -1748,6 +1841,9 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) create_ft_err: esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_offloads_acl_tables(esw); + return err; } @@ -1756,8 +1852,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) - esw_prio_tag_acls_cleanup(esw); + esw_destroy_offloads_acl_tables(esw); } static void esw_functions_changed_event_handler(struct work_struct *work) @@ -2296,3 +2391,16 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) return vport_num >= MLX5_VPORT_FIRST_VF && vport_num <= esw->dev->priv.sriov.max_vfs; } + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num) +{ + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 174eec0871d9..aece3ae1902d 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -67,11 +67,28 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, #ifdef CONFIG_MLX5_ESWITCH enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num); #else /* CONFIG_MLX5_ESWITCH */ static inline enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) { return DEVLINK_ESWITCH_ENCAP_MODE_NONE; } + +static inline bool +mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return false; +}; + +static inline u32 +mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + int vport_num) +{ + return 0; +}; #endif /* CONFIG_MLX5_ESWITCH */ #endif From 8d212ff057f8b81ed6ed418874b54ded3bf97ad4 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:02 +0000 Subject: [PATCH 26/32] net/mlx5e: Specifying known origin of packets matching the flow In vport metadata matching, source port number is replaced by metadata. While FW has no idea about what it is in the metadata, a syndrome will happen. Specify a known origin to avoid the syndrome. However, there is no functional change because ANY_VPORT (0) is filled in flow_source, the same default value as before, as a pre-step towards metadata matching for fast path. There are two other values can be filled in flow_source. When setting 0x1, packet matching this rule is from uplink, while 0x2 is for packet from other local vports. Signed-off-by: Jianbo Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 +++ include/linux/mlx5/fs.h | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 9ec46edf22a6..ddf1b87f1bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, index) __field(u32, action) __field(u32, flow_tag) + __field(u32, flow_source) __field(u8, mask_enable) __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) @@ -205,6 +206,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index fb1335a433ae..7ac1249eadc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -398,6 +398,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 9bf49ce218fa..dc7e7aa53a13 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -95,6 +95,7 @@ enum { struct mlx5_flow_context { u32 flags; u32 flow_tag; + u32 flow_source; }; struct mlx5_flow_spec { From c01cfd0f111511f005ac9c2608556a02b012a2dc Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:04 +0000 Subject: [PATCH 27/32] net/mlx5: E-Switch, Add match on vport metadata for rule in fast path If FW's capabilities and configurations meet the requirement of vport metadata matching, this feature will be used. As the information about vport number and vhca_id related to packet is already stored to its metadata register, which is used as an indicator for perticular vport, now we can change to match on this metadata for all the offloading rules in fast path. Signed-off-by: Jianbo Liu Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 85 +++++++++++-------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4bcbc872cd08..a3cf787382ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -88,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) return 1; } +static void +mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + void *misc2; + void *misc; + + /* Use metadata matching because vport is not represented by single + * VHCA in dual-port RoCE mode, and matching on source vport may fail. + */ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch, + attr->in_rep->vport)); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc))) + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && + attr->in_rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -99,7 +146,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; - void *misc; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); @@ -159,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, i++; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { if (attr->tunnel_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -219,7 +252,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; - void *misc; int i; fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); @@ -251,25 +283,10 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].ft = fwd_fdb, i++; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); From 57843868700162922963baa69fc049a0256aeed2 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:05 +0000 Subject: [PATCH 28/32] net/mlx5: E-Switch, Add query and modify esw vport context functions Add esw vport query and modify functions, and exposing them is needed for enabling or disabling registers passed as metatdata to vport NIC_RX table in slow path. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 24 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 ++++ 2 files changed, 29 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1235fd84ae3a..67598272d4a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen) +{ + return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen); +} + +static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen) +{ + return query_esw_vport_context_cmd(esw->dev, vport, out, outlen); +} + static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 51e71b824abf..335cbeee1b9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -276,6 +276,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen); +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen); + struct mlx5_flow_spec; struct mlx5_esw_flow_attr; From c1286050cf47170fbce7edc3787ab577a882863b Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:07 +0000 Subject: [PATCH 29/32] net/mlx5: E-Switch, Pass metadata from FDB to eswitch manager In order to do matching on metadata in slow path when demuxing traffic to representors, explicitly enable the feature that allows HW to pass metadata REG_C_0 from FDB to eswitch manager NIC_RX table. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a3cf787382ee..178ff9b05258 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -574,6 +574,59 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, struct mlx5_flow_spec *spec, struct mlx5_flow_destination *dest) @@ -1977,6 +2030,12 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, if (err) return err; + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = mlx5_eswitch_enable_passing_vport_metadata(esw); + if (err) + goto err_vport_metadata; + } + /* Only load special vports reps. VF reps will be loaded in * context of functions_changed event handler through real * or emulated event. @@ -2004,6 +2063,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, return 0; err_reps: + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); +err_vport_metadata: esw_offloads_steering_cleanup(esw); return err; } @@ -2033,6 +2095,8 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); esw_offloads_steering_cleanup(esw); } From a5641cb524cd023c5fafbe41891c8ec510b65f3b Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:09 +0000 Subject: [PATCH 30/32] net/mlx5: E-Switch, Add match on vport metadata for rule in slow path In slow path, packet that not matched by any offloaded rule is forwarded to eswitch vport manager for further processing. Add matching on metadata for peer miss rules in FDB, and rules which forward packet to correct representor in esw manager NIC_RX table. Signed-off-by: Jianbo Liu Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 142 +++++++++++++----- 1 file changed, 107 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 178ff9b05258..94b55d0bdda9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -627,23 +627,34 @@ static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) in, sizeof(in)); } -static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, +static void peer_miss_rules_setup(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev, struct mlx5_flow_spec *spec, struct mlx5_flow_destination *dest) { - void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + void *misc; - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(peer_dev, vhca_id)); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + } dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest->vport.num = peer_dev->priv.eswitch->manager_vport; @@ -651,6 +662,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } +static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + struct mlx5_flow_spec *spec, + u16 vport) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(peer_esw, + vport)); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + } +} + static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { @@ -668,7 +699,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, if (!spec) return -ENOMEM; - peer_miss_rules_setup(peer_dev, spec, &dest); + peer_miss_rules_setup(esw, peer_dev, spec, &dest); flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); if (!flows) { @@ -681,7 +712,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc_parameters); if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -703,7 +736,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, i); + esw_set_peer_miss_rule_source_port(esw, + peer_dev->priv.eswitch, + spec, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -987,6 +1023,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 +static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in) +{ + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0); + } else { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + } +} + static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1084,19 +1144,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) /* create peer esw miss group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); + esw_set_flow_group_source_port(esw, flow_group_in); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_port); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + esw->total_vports - 1); @@ -1210,7 +1272,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; u32 *flow_group_in; - void *match_criteria, *misc; int err = 0; nvports = nvports + MLX5_ESW_MISS_FLOWS; @@ -1220,12 +1281,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) /* create vport rx group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + esw_set_flow_group_source_port(esw, flow_group_in); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); @@ -1264,13 +1321,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, goto out; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, vport); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, @@ -1557,6 +1625,10 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; + err = mlx5_esw_offloads_pair(esw, peer_esw); if (err) goto err_out; From 669ff1e32f334e6a09e523db94989b96da4a4710 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:12 +0000 Subject: [PATCH 31/32] RDMA/mlx5: Add vport metadata matching for IB representors If vport metadata matching is enabled in eswitch, the rule created must be changed to match on the metadata, instead of source port. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 45 ++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index be4c9a687df7..602ac3feea5d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3469,6 +3469,37 @@ static int flow_counters_set_data(struct ib_counters *ibcounters, return ret; } +static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, + rep->vport)); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + } +} + static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, const struct ib_flow_attr *flow_attr, @@ -3523,19 +3554,15 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, set_underlay_qp(dev, spec, underlay_qpn); if (dev->is_rep) { - void *misc; + struct mlx5_eswitch_rep *rep; - if (!dev->port[flow_attr->port - 1].rep) { + rep = dev->port[flow_attr->port - 1].rep; + if (!rep) { err = -EINVAL; goto free; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, - dev->port[flow_attr->port - 1].rep->vport); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + mlx5_ib_set_rule_source_port(dev, spec, rep); } spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); From 92ab1eb392c6ac6f7fdeee4bfdfb39aa860a371f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 25 Jun 2019 17:48:14 +0000 Subject: [PATCH 32/32] net/mlx5: E-Switch, Enable vport metadata matching if firmware supports it As the ingress ACL rules save vhca id and vport number to packet's metadata REG_C_0, and the metadata matching for the rules in both fast path and slow path are all added, enable this feature if supported. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 94b55d0bdda9..174b0ec4162f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1904,12 +1904,35 @@ static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, return err; } +static bool +esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +{ + if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) + return false; + + if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_0)) + return false; + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return false; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; int i, j; int err; + if (esw_check_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + mlx5_esw_for_all_vports(esw, i, vport) { err = esw_vport_ingress_common_config(esw, vport); if (err)