From 86f5d0f3d4995b8b8909da7eea235e5bc1ceefbe Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 2 Mar 2020 16:15:19 -0800 Subject: [PATCH 01/16] net/mlx5: Introduce egress acl forward-to-vport capability Add HCA_CAP.egress_acl_forward_to_vport field to check whether HW supports e-switch vport's egress acl to forward packets to other e-switch vport or not. By default E-Switch egress ACL forwards eswitch vports egress packets to their corresponding NIC/VF vports. With this cap enabled, the driver is allowed to alter this behavior and forward packets to arbitrary NIC/VF vports with the following limitations: a. Multiple processing paths are supported if all of the following conditions are met: - HCA_CAP.egress_acl_forward_to_vport is set ==1. - A destination of type Flow Table only appears once, as the last destination in the list. - Vport destination is supported if HCA_CAP.egress_acl_forward_to_vport==1. Vport must not be the Uplink. b. Flow_tag not supported. c. This table is only applicable after an FDB table is created. d. Push VLAN action is not supported. e. Pop VLAN action cannot be added concurrently to this table and FDB table. This feature will be used during port failover in bonding scenario where two VFs representors are bonded to handle failover egress traffic (VM's ingress/receive traffic). Signed-off-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7d89ab64b372..07be46e713fc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -738,7 +738,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 flow_source[0x1]; u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; - u8 reserved_at_1b[0x1]; + u8 egress_acl_forward_to_vport[0x1]; u8 fdb_multi_path_to_table[0x1]; u8 reserved_at_1d[0x3]; From bd673da6d933e3c8f652b011afba6cee0f8bbe45 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 2 Mar 2020 16:15:20 -0800 Subject: [PATCH 02/16] net/mlx5: Introduce TLS and IPSec objects enums Expose the TLS encryption key general object type enum correctly, and add the IPSec encryption key general object type enum. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index 3fc575d1c3ec..dcea87ec5977 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -42,7 +42,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); MLX5_SET(encryption_key_obj, obj, key_type, - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK); + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS); MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 07be46e713fc..2e98bba12356 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10489,7 +10489,8 @@ enum { }; enum { - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2, }; struct mlx5_ifc_tls_static_params_bits { From dc392fc56f39a00a46d6db2d150571ccafe99734 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 2 Mar 2020 16:15:21 -0800 Subject: [PATCH 03/16] net/mlx5: Expose link speed directly Expose port rate as part of the port speed register fields. Signed-off-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2e98bba12356..d0a678c82ccd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8423,7 +8423,8 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x1c]; + u8 reserved_at_24[0xc]; + u8 data_rate_oper[0x10]; u8 ext_eth_proto_capability[0x20]; From e0ebd8eb36ed850a22a9a0ca83edc4a40ad67c16 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 2 Mar 2020 16:15:22 -0800 Subject: [PATCH 04/16] net/mlx5: HW bit for goto chain offload support Add the HW bit definition indecating goto chain offload support. Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d0a678c82ccd..9b8ff4e57002 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -414,7 +414,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_16[0x1]; u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; - u8 reserved_at_19[0x7]; + u8 reformat_and_fwd_to_table[0x1]; + u8 reserved_at_1a[0x6]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; From 54c62e13ad765a346d220b2566f84c6092cf3564 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Mar 2020 10:22:27 +0200 Subject: [PATCH 05/16] {IB,net}/mlx5: Setup mkey variant before mr create command invocation On reg_mr_callback() mlx5_ib is recalculating the mkey variant which is wrong and will lead to using a different key variant than the one submitted to firmware on create mkey command invocation. To fix this, we store the mkey variant before invoking the firmware command and use it later on completion (reg_mr_callback). Signed-off-by: Saeed Mahameed Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 7 ++----- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6fa0a83c19de..45c3282dd5e1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -87,7 +87,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); struct mlx5_cache_ent *ent = &cache->ent[c]; - u8 key; unsigned long flags; spin_lock_irqsave(&ent->lock, flags); @@ -102,10 +101,8 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) } mr->mmkey.type = MLX5_MKEY_MR; - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); - key = dev->mdev->priv.mkey_key++; - spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; + mr->mmkey.key |= mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, mr->out, mkey_index)); cache->last_add = jiffies; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 42cc3c7ac5b6..770d13bb4f20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -56,6 +56,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; if (callback) return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, @@ -68,7 +69,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); - mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", From fc6a9f86f08acd3665f788619afae0d2b2d5a480 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Mar 2020 10:22:28 +0200 Subject: [PATCH 06/16] {IB,net}/mlx5: Assign mkey variant in mlx5_ib only mkey variant is not required for mlx5_core use, move the mkey variant counter to mlx5_ib. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ++ drivers/infiniband/hw/mlx5/mr.c | 58 +++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/main.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/mr.c | 8 +-- include/linux/mlx5/driver.h | 4 -- 6 files changed, 55 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e4bcfa81b70a..fce863621414 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6390,6 +6390,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); + spin_lock_init(&dev->mkey_lock); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d9bffcc93587..89a050e516a8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -992,6 +992,11 @@ struct mlx5_ib_dev { /* sync used page count stats */ struct mlx5_ib_resources devr; + + /* protect mkey key part */ + spinlock_t mkey_lock; + u8 mkey_key; + struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 45c3282dd5e1..1b83d00e8ecd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,6 +47,46 @@ enum { #define MLX5_UMR_ALIGN 2048 +static void +create_mkey_callback(int status, struct mlx5_async_work *context); + +static void +assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in) +{ + void *mkc; + u8 key; + + spin_lock_irq(&dev->mkey_lock); + key = dev->mkey_key++; + spin_unlock_irq(&dev->mkey_lock); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; +} + +static int +mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); +} + +static int +mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mkey, + struct mlx5_async_ctx *async_ctx, + u32 *in, int inlen, u32 *out, int outlen, + struct mlx5_async_work *context) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey_cb(dev->mdev, mkey, async_ctx, + in, inlen, out, outlen, + create_mkey_callback, context); +} + static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); @@ -79,7 +119,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void reg_mr_callback(int status, struct mlx5_async_work *context) +static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); @@ -160,10 +200,10 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, + err = mlx5_ib_create_mkey_cb(dev, &mr->mmkey, &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, &mr->cb_work); + &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -682,7 +722,6 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -704,7 +743,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) MLX5_SET(mkc, mkc, length64, 1); set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1094,7 +1133,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, get_octo_len(virt_addr, length, page_shift)); } - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1134,7 +1173,6 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -1157,7 +1195,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET64(mkc, mkc, len, length); set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1635,7 +1673,7 @@ static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_free_descs; @@ -1902,7 +1940,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); if (err) goto free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f554cfddcf4e..6b38ec72215a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1282,7 +1282,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&priv->alloc_mutex); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); - spin_lock_init(&priv->mkey_lock); priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 770d13bb4f20..51814d023efb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -49,14 +49,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, int err; u8 key; - spin_lock_irq(&dev->priv.mkey_lock); - key = dev->priv.mkey_key++; - spin_unlock_irq(&dev->priv.mkey_lock); - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - MLX5_SET(mkc, mkc, mkey_7_0, key); - mkey->key = key; if (callback) return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, @@ -66,6 +59,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, if (err) return err; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f2b4225ed650..e044703c056b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -575,10 +575,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; From a3cfdd3928113012d0f2c5353277f4e27878a663 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 10 Mar 2020 10:22:30 +0200 Subject: [PATCH 07/16] {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the logic inside mlx5_ib and cleanup the code in mlx5_core. Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 +++----------------- include/linux/mlx5/driver.h | 6 ------ 3 files changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1b83d00e8ecd..8508af500972 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -81,10 +81,10 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, u32 *in, int inlen, u32 *out, int outlen, struct mlx5_async_work *context) { + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); assign_mkey_variant(dev, mkey, in); - return mlx5_core_create_mkey_cb(dev->mdev, mkey, async_ctx, - in, inlen, out, outlen, - create_mkey_callback, context); + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, + create_mkey_callback, context); } static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 51814d023efb..fd3e6d217c3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,12 +36,9 @@ #include #include "mlx5_core.h" -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context) +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; u32 mkey_index; @@ -51,10 +48,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - if (callback) - return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, - callback, context); - err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); if (err) return err; @@ -70,15 +63,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mkey_index, key, mkey->key); return 0; } -EXPORT_SYMBOL(mlx5_core_create_mkey_cb); - -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen) -{ - return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, - NULL, 0, NULL, NULL); -} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e044703c056b..1de78f001d26 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -943,12 +943,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); From f743ff3b37dfc8e8cf4a32d4254cddd5f0f1add8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Mar 2020 10:22:29 +0200 Subject: [PATCH 08/16] RDMA/mlx5: Replace spinlock protected write with atomic var mkey variant calculation was spinlock protected to make it atomic, replace that with one atomic variable. Link: https://lore.kernel.org/r/20200310082238.239865-4-leon@kernel.org Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +---- drivers/infiniband/hw/mlx5/mr.c | 6 +----- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fce863621414..e4f8bee486c2 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6390,7 +6390,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); - spin_lock_init(&dev->mkey_lock); + atomic_set(&dev->mkey_var, 0); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 89a050e516a8..3445402b23cc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -993,10 +993,7 @@ struct mlx5_ib_dev { */ struct mlx5_ib_resources devr; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - + atomic_t mkey_var; struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8508af500972..a1e6ab9b0bed 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -54,12 +54,8 @@ static void assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, u32 *in) { + u8 key = atomic_inc_return(&dev->mkey_var); void *mkc; - u8 key; - - spin_lock_irq(&dev->mkey_lock); - key = dev->mkey_key++; - spin_unlock_irq(&dev->mkey_lock); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, mkey_7_0, key); From 7c8691a396bd2084c7abc02d7aa34dade597814d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:31 +0200 Subject: [PATCH 09/16] RDMA/mlx5: Rename the tracking variables for the MR cache The old names do not clearly indicate the intent. Link: https://lore.kernel.org/r/20200310082238.239865-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 +++++++--- drivers/infiniband/hw/mlx5/mr.c | 54 ++++++++++++++-------------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3445402b23cc..731b0f7bbe5b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -699,15 +699,26 @@ struct mlx5_cache_ent { u32 access_mode; u32 page; - u32 size; - u32 cur; + /* + * - available_mrs is the length of list head, ie the number of MRs + * available for immediate allocation. + * - total_mrs is available_mrs plus all in use MRs that could be + * returned to the cache. + * - limit is the low water mark for available_mrs, 2* limit is the + * upper water mark. + * - pending is the number of MRs currently being created + */ + u32 total_mrs; + u32 available_mrs; + u32 limit; + u32 pending; + + /* Statistics */ u32 miss; - u32 limit; struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; - int pending; struct completion compl; }; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index a1e6ab9b0bed..9f5afa24896d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -144,8 +144,8 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); - ent->cur++; - ent->size++; + ent->available_mrs++; + ent->total_mrs++; spin_unlock_irqrestore(&ent->lock, flags); if (!completion_done(&ent->compl)) @@ -231,8 +231,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; + ent->available_mrs--; + ent->total_mrs--; spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } @@ -265,16 +265,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf, if (var < ent->limit) return -EINVAL; - if (var > ent->size) { + if (var > ent->total_mrs) { do { - err = add_keys(dev, c, var - ent->size); + err = add_keys(dev, c, var - ent->total_mrs); if (err && err != -EAGAIN) return err; usleep_range(3000, 5000); } while (err); - } else if (var < ent->size) { - remove_keys(dev, c, ent->size - var); + } else if (var < ent->total_mrs) { + remove_keys(dev, c, ent->total_mrs - var); } return count; @@ -287,7 +287,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs); if (err < 0) return err; @@ -320,13 +320,13 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; - if (var > ent->size) + if (var > ent->total_mrs) return -EINVAL; ent->limit = var; - if (ent->cur < ent->limit) { - err = add_keys(dev, c, 2 * ent->limit - ent->cur); + if (ent->available_mrs < ent->limit) { + err = add_keys(dev, c, 2 * ent->limit - ent->available_mrs); if (err) return err; } @@ -360,7 +360,7 @@ static int someone_adding(struct mlx5_mr_cache *cache) int i; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - if (cache->ent[i].cur < cache->ent[i].limit) + if (cache->ent[i].available_mrs < cache->ent[i].limit) return 1; } @@ -378,9 +378,9 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) return; ent = &dev->cache.ent[i]; - if (ent->cur < 2 * ent->limit && !dev->fill_delay) { + if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) { err = add_keys(dev, i, 1); - if (ent->cur < 2 * ent->limit) { + if (ent->available_mrs < 2 * ent->limit) { if (err == -EAGAIN) { mlx5_ib_dbg(dev, "returned eagain, order %d\n", i + 2); @@ -395,7 +395,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) queue_work(cache->wq, &ent->work); } } - } else if (ent->cur > 2 * ent->limit) { + } else if (ent->available_mrs > 2 * ent->limit) { /* * The remove_keys() logic is performed as garbage collection * task. Such task is intended to be run when no other active @@ -411,7 +411,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { remove_keys(dev, i, 1); - if (ent->cur > ent->limit) + if (ent->available_mrs > ent->limit) queue_work(cache->wq, &ent->work); } else { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); @@ -462,9 +462,9 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); - ent->cur--; + ent->available_mrs--; spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) + if (ent->available_mrs < ent->limit) queue_work(cache->wq, &ent->work); return mr; } @@ -497,9 +497,9 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); - ent->cur--; + ent->available_mrs--; spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) + if (ent->available_mrs < ent->limit) queue_work(cache->wq, &ent->work); break; } @@ -531,7 +531,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->allocated_from_cache = false; destroy_mkey(dev, mr); ent = &cache->ent[c]; - if (ent->cur < ent->limit) + if (ent->available_mrs < ent->limit) queue_work(cache->wq, &ent->work); return; } @@ -539,8 +539,8 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); - ent->cur++; - if (ent->cur > 2 * ent->limit) + ent->available_mrs++; + if (ent->available_mrs > 2 * ent->limit) shrink = 1; spin_unlock_irq(&ent->lock); @@ -565,8 +565,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; + ent->available_mrs--; + ent->total_mrs--; spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } @@ -604,7 +604,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) dir = debugfs_create_dir(ent->name, cache->root); debugfs_create_file("size", 0600, dir, ent, &size_fops); debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("cur", 0400, dir, &ent->available_mrs); debugfs_create_u32("miss", 0600, dir, &ent->miss); } } From b91e1751fbcee7692e45308e74d8816c43802ede Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:32 +0200 Subject: [PATCH 10/16] RDMA/mlx5: Simplify how the MR cache bucket is located There are many bad APIs here that are accepting a cache bucket index instead of a bucket pointer. Many of the callers already have a bucket pointer, so this results in a lot of confusing uses of order2idx(). Pass the struct mlx5_cache_ent into add_keys(), remove_keys(), and alloc_cached_mr(). Once the MR is in the cache, store the cache bucket pointer directly in the MR, replacing the 'bool allocated_from cache'. In the end there is only one place that needs to form index from order, alloc_mr_from_cache(). Increase the safety of this function by disallowing it from accessing cache entries in the ODP special area. Link: https://lore.kernel.org/r/20200310082238.239865-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +- drivers/infiniband/hw/mlx5/mr.c | 160 +++++++++++---------------- drivers/infiniband/hw/mlx5/odp.c | 2 +- 3 files changed, 71 insertions(+), 98 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 731b0f7bbe5b..7208946d2787 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,8 +617,8 @@ struct mlx5_ib_mr { struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - int order; - bool allocated_from_cache; + unsigned int order; + struct mlx5_cache_ent *cache_ent; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; @@ -1274,7 +1274,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9f5afa24896d..55e31f6effda 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -99,16 +99,6 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static int order2idx(struct mlx5_ib_dev *dev, int order) -{ - struct mlx5_mr_cache *cache = &dev->cache; - - if (order < cache->ent[0].order) - return 0; - else - return order - cache->ent[0].order; -} - static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) { return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= @@ -120,9 +110,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; - struct mlx5_mr_cache *cache = &dev->cache; - int c = order2idx(dev, mr->order); - struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_cache_ent *ent = mr->cache_ent; unsigned long flags; spin_lock_irqsave(&ent->lock, flags); @@ -140,7 +128,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); - cache->last_add = jiffies; + dev->cache.last_add = jiffies; spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); @@ -152,10 +140,8 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) complete(&ent->compl); } -static int add_keys(struct mlx5_ib_dev *dev, int c, int num) +static int add_keys(struct mlx5_cache_ent *ent, int num) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; void *mkc; @@ -180,8 +166,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->allocated_from_cache = true; - mr->dev = dev; + mr->cache_ent = ent; + mr->dev = ent->dev; MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); @@ -196,15 +182,15 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_ib_create_mkey_cb(dev, &mr->mmkey, - &dev->async_ctx, in, inlen, - mr->out, sizeof(mr->out), - &mr->cb_work); + err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, + &ent->dev->async_ctx, in, inlen, + mr->out, sizeof(mr->out), + &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; spin_unlock_irq(&ent->lock); - mlx5_ib_warn(dev, "create mkey failed %d\n", err); + mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); kfree(mr); break; } @@ -214,10 +200,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) return err; } -static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) +static void remove_keys(struct mlx5_cache_ent *ent, int num) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -234,7 +218,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) ent->available_mrs--; ent->total_mrs--; spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); } list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -247,18 +231,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; char lbuf[20] = {0}; u32 var; int err; - int c; count = min(count, sizeof(lbuf) - 1); if (copy_from_user(lbuf, buf, count)) return -EFAULT; - c = order2idx(dev, ent->order); - if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -267,14 +247,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf, if (var > ent->total_mrs) { do { - err = add_keys(dev, c, var - ent->total_mrs); + err = add_keys(ent, var - ent->total_mrs); if (err && err != -EAGAIN) return err; usleep_range(3000, 5000); } while (err); } else if (var < ent->total_mrs) { - remove_keys(dev, c, ent->total_mrs - var); + remove_keys(ent, ent->total_mrs - var); } return count; @@ -305,18 +285,14 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; char lbuf[20] = {0}; u32 var; int err; - int c; count = min(count, sizeof(lbuf) - 1); if (copy_from_user(lbuf, buf, count)) return -EFAULT; - c = order2idx(dev, ent->order); - if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -326,7 +302,7 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, ent->limit = var; if (ent->available_mrs < ent->limit) { - err = add_keys(dev, c, 2 * ent->limit - ent->available_mrs); + err = add_keys(ent, 2 * ent->limit - ent->available_mrs); if (err) return err; } @@ -371,24 +347,22 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; struct mlx5_mr_cache *cache = &dev->cache; - int i = order2idx(dev, ent->order); int err; if (cache->stopped) return; - ent = &dev->cache.ent[i]; if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) { - err = add_keys(dev, i, 1); + err = add_keys(ent, 1); if (ent->available_mrs < 2 * ent->limit) { if (err == -EAGAIN) { mlx5_ib_dbg(dev, "returned eagain, order %d\n", - i + 2); + ent->order); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(3)); } else if (err) { mlx5_ib_warn(dev, "command failed order %d, err %d\n", - i + 2, err); + ent->order, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); } else { @@ -410,7 +384,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) */ if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_keys(dev, i, 1); + remove_keys(ent, 1); if (ent->available_mrs > ent->limit) queue_work(cache->wq, &ent->work); } else { @@ -435,17 +409,18 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) +/* Allocate a special entry from the cache */ +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; int err; - if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { - mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); + if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || + entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); - } ent = &cache->ent[entry]; while (1) { @@ -453,7 +428,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - err = add_keys(dev, entry, 1); + err = add_keys(ent, 1); if (err && err != -EAGAIN) return ERR_PTR(err); @@ -471,26 +446,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) } } -static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) +static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent; - int last_umr_cache_entry; - int c; - int i; + struct mlx5_cache_ent *ent = req_ent; - c = order2idx(dev, order); - last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); - if (c < 0 || c > last_umr_cache_entry) { - mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); - return NULL; - } - - for (i = c; i <= last_umr_cache_entry; i++) { - ent = &cache->ent[i]; - - mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); + /* Try larger MR pools from the cache to satisfy the allocation */ + for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { + mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, + ent - dev->cache.ent); spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { @@ -500,43 +465,36 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) ent->available_mrs--; spin_unlock_irq(&ent->lock); if (ent->available_mrs < ent->limit) - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); break; } spin_unlock_irq(&ent->lock); - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); } if (!mr) - cache->ent[c].miss++; + req_ent->miss++; return mr; } void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; + struct mlx5_cache_ent *ent = mr->cache_ent; int shrink = 0; - int c; - if (!mr->allocated_from_cache) + if (!ent) return; - c = order2idx(dev, mr->order); - WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (mlx5_mr_cache_invalidate(mr)) { - mr->allocated_from_cache = false; + mr->cache_ent = NULL; destroy_mkey(dev, mr); - ent = &cache->ent[c]; if (ent->available_mrs < ent->limit) - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); return; } - ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; @@ -545,7 +503,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) spin_unlock_irq(&ent->lock); if (shrink) - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); } static void clean_keys(struct mlx5_ib_dev *dev, int c) @@ -872,22 +830,38 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_ib_mr *alloc_mr_from_cache( - struct ib_pd *pd, struct ib_umem *umem, - u64 virt_addr, u64 len, int npages, - int page_shift, int order, int access_flags) +static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, + unsigned int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + + if (order < cache->ent[0].order) + return &cache->ent[0]; + order = order - cache->ent[0].order; + if (order > MR_CACHE_LAST_STD_ENTRY) + return NULL; + return &cache->ent[order]; +} + +static struct mlx5_ib_mr * +alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, + u64 len, int npages, int page_shift, unsigned int order, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); struct mlx5_ib_mr *mr; int err = 0; int i; + if (!ent) + return ERR_PTR(-E2BIG); for (i = 0; i < 1; i++) { - mr = alloc_cached_mr(dev, order); + mr = alloc_cached_mr(ent); if (mr) break; - err = add_keys(dev, order2idx(dev, order), 1); + err = add_keys(ent, 1); if (err && err != -EAGAIN) { mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); break; @@ -1470,7 +1444,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->allocated_from_cache) + if (mr->cache_ent) err = mlx5_mr_cache_invalidate(mr); else err = destroy_mkey(dev, mr); @@ -1486,7 +1460,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - mr->allocated_from_cache = false; + mr->cache_ent = NULL; } else { /* * Send a UMR WQE @@ -1573,8 +1547,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int allocated_from_cache = mr->allocated_from_cache; - if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) @@ -1589,7 +1561,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig = NULL; } - if (!allocated_from_cache) { + if (!mr->cache_ent) { destroy_mkey(dev, mr); mlx5_free_priv_descs(mr); } @@ -1606,7 +1578,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); - if (mr->allocated_from_cache) + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4216814ba871..224f480fc441 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) odp->private = NULL; mutex_unlock(&odp->umem_mutex); - if (!mr->allocated_from_cache) { + if (!mr->cache_ent) { mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); WARN_ON(mr->descs); } From 1769c4c575489be28891c98f1e3f0a4252ca750a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:33 +0200 Subject: [PATCH 11/16] RDMA/mlx5: Always remove MRs from the cache before destroying them The cache bucket tracks the total number of MRs that exists, both inside and outside of the cache. Removing a MR from the cache (by setting cache_ent to NULL) without updating total_mrs will cause the tracking to leak and be inflated. Further fix the rereg_mr path to always destroy the MR. reg_create will always overwrite all the MR data in mlx5_ib_mr, so the MR must be completely destroyed, in all cases, before this function can be called. Detach the MR from the cache and unconditionally destroy it to avoid leaking HW mkeys. Fixes: afd1417404fb ("IB/mlx5: Use direct mkey destroy command upon UMR unreg failure") Fixes: 56e11d628c5d ("IB/mlx5: Added support for re-registration of MRs") Link: https://lore.kernel.org/r/20200310082238.239865-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 55e31f6effda..9b980ef326b4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -479,6 +479,16 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) return mr; } +static void detach_mr_from_cache(struct mlx5_ib_mr *mr) +{ + struct mlx5_cache_ent *ent = mr->cache_ent; + + mr->cache_ent = NULL; + spin_lock_irq(&ent->lock); + ent->total_mrs--; + spin_unlock_irq(&ent->lock); +} + void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; @@ -488,7 +498,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return; if (mlx5_mr_cache_invalidate(mr)) { - mr->cache_ent = NULL; + detach_mr_from_cache(mr); destroy_mkey(dev, mr); if (ent->available_mrs < ent->limit) queue_work(dev->cache.wq, &ent->work); @@ -1445,9 +1455,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * UMR can't be used - MKey needs to be replaced. */ if (mr->cache_ent) - err = mlx5_mr_cache_invalidate(mr); - else - err = destroy_mkey(dev, mr); + detach_mr_from_cache(mr); + err = destroy_mkey(dev, mr); if (err) goto err; @@ -1459,8 +1468,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mr = to_mmr(ib_mr); goto err; } - - mr->cache_ent = NULL; } else { /* * Send a UMR WQE From a1d8854aae4ee19df6161a276a99d3c9c2abc4f3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:34 +0200 Subject: [PATCH 12/16] RDMA/mlx5: Fix MR cache size and limit debugfs The size_write function is supposed to adjust the total_mr's to match the user's request, but lacks locking and safety checking. total_mrs can only be adjusted by at most available_mrs. mrs already assigned to users cannot be revoked. Ensure that the user provides a target value within the range of available_mrs and within the high/low water mark. limit_write has confusing and wrong sanity checking, and doesn't have the ability to deallocate on limit reduction. Since both functions use the same algorithm to adjust the available_mrs, consolidate it into one function and write it correctly. Fix the locking and by holding the spinlock for all accesses to ent->X. Always fail if the user provides a malformed string. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20200310082238.239865-9-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 152 ++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9b980ef326b4..091e24c58e2c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -140,7 +140,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) complete(&ent->compl); } -static int add_keys(struct mlx5_cache_ent *ent, int num) +static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; @@ -200,30 +200,54 @@ static int add_keys(struct mlx5_cache_ent *ent, int num) return err; } -static void remove_keys(struct mlx5_cache_ent *ent, int num) +static void remove_cache_mr(struct mlx5_cache_ent *ent) { - struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; - LIST_HEAD(del_list); - int i; - for (i = 0; i < num; i++) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - break; - } - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_move(&mr->list, &del_list); - ent->available_mrs--; - ent->total_mrs--; + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + return; } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + ent->total_mrs--; + spin_unlock_irq(&ent->lock); + mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + kfree(mr); +} - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { - list_del(&mr->list); - kfree(mr); +static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, + bool limit_fill) +{ + int err; + + lockdep_assert_held(&ent->lock); + + while (true) { + if (limit_fill) + target = ent->limit * 2; + if (target == ent->available_mrs + ent->pending) + return 0; + if (target > ent->available_mrs + ent->pending) { + u32 todo = target - (ent->available_mrs + ent->pending); + + spin_unlock_irq(&ent->lock); + err = add_keys(ent, todo); + if (err == -EAGAIN) + usleep_range(3000, 5000); + spin_lock_irq(&ent->lock); + if (err) { + if (err != -EAGAIN) + return err; + } else + return 0; + } else { + spin_unlock_irq(&ent->lock); + remove_cache_mr(ent); + spin_lock_irq(&ent->lock); + } } } @@ -231,33 +255,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - char lbuf[20] = {0}; - u32 var; + u32 target; int err; - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; + err = kstrtou32_from_user(buf, count, 0, &target); + if (err) + return err; - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; - - if (var < ent->limit) - return -EINVAL; - - if (var > ent->total_mrs) { - do { - err = add_keys(ent, var - ent->total_mrs); - if (err && err != -EAGAIN) - return err; - - usleep_range(3000, 5000); - } while (err); - } else if (var < ent->total_mrs) { - remove_keys(ent, ent->total_mrs - var); + /* + * Target is the new value of total_mrs the user requests, however we + * cannot free MRs that are in use. Compute the target value for + * available_mrs. + */ + spin_lock_irq(&ent->lock); + if (target < ent->total_mrs - ent->available_mrs) { + err = -EINVAL; + goto err_unlock; } + target = target - (ent->total_mrs - ent->available_mrs); + if (target < ent->limit || target > ent->limit*2) { + err = -EINVAL; + goto err_unlock; + } + err = resize_available_mrs(ent, target, false); + if (err) + goto err_unlock; + spin_unlock_irq(&ent->lock); return count; + +err_unlock: + spin_unlock_irq(&ent->lock); + return err; } static ssize_t size_read(struct file *filp, char __user *buf, size_t count, @@ -285,28 +314,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - char lbuf[20] = {0}; u32 var; int err; - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; - - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; - - if (var > ent->total_mrs) - return -EINVAL; + err = kstrtou32_from_user(buf, count, 0, &var); + if (err) + return err; + /* + * Upon set we immediately fill the cache to high water mark implied by + * the limit. + */ + spin_lock_irq(&ent->lock); ent->limit = var; - - if (ent->available_mrs < ent->limit) { - err = add_keys(ent, 2 * ent->limit - ent->available_mrs); - if (err) - return err; - } - + err = resize_available_mrs(ent, 0, true); + spin_unlock_irq(&ent->lock); + if (err) + return err; return count; } @@ -371,20 +395,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) } } else if (ent->available_mrs > 2 * ent->limit) { /* - * The remove_keys() logic is performed as garbage collection - * task. Such task is intended to be run when no other active - * processes are running. + * The remove_cache_mr() logic is performed as garbage + * collection task. Such task is intended to be run when no + * other active processes are running. * * The need_resched() will return TRUE if there are user tasks * to be activated in near future. * - * In such case, we don't execute remove_keys() and postpone - * the garbage collection work to try to run in next cycle, - * in order to free CPU resources to other tasks. + * In such case, we don't execute remove_cache_mr() and postpone + * the garbage collection work to try to run in next cycle, in + * order to free CPU resources to other tasks. */ if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_keys(ent, 1); + remove_cache_mr(ent); if (ent->available_mrs > ent->limit) queue_work(cache->wq, &ent->work); } else { From ad2d3ef46d2a88f2906d8d0cc6b912199ec3f1d6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:35 +0200 Subject: [PATCH 13/16] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work Accesses to these members needs to be locked. There is no reason not to hold a spinlock while calling queue_work(), so move the tests into a helper and always call it under lock. The helper should be called when available_mrs is adjusted. Link: https://lore.kernel.org/r/20200310082238.239865-10-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 40 ++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 091e24c58e2c..b46039d86b98 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -134,6 +134,10 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) list_add_tail(&mr->list, &ent->head); ent->available_mrs++; ent->total_mrs++; + /* + * Creating is always done in response to some demand, so do not call + * queue_adjust_cache_locked(). + */ spin_unlock_irqrestore(&ent->lock, flags); if (!completion_done(&ent->compl)) @@ -367,6 +371,20 @@ static int someone_adding(struct mlx5_mr_cache *cache) return 0; } +/* + * Check if the bucket is outside the high/low water mark and schedule an async + * update. The cache refill has hysteresis, once the low water mark is hit it is + * refilled up to the high mark. + */ +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) +{ + lockdep_assert_held(&ent->lock); + + if (ent->available_mrs < ent->limit || + ent->available_mrs > 2 * ent->limit) + queue_work(ent->dev->cache.wq, &ent->work); +} + static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; @@ -462,9 +480,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, list); list_del(&mr->list); ent->available_mrs--; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - if (ent->available_mrs < ent->limit) - queue_work(cache->wq, &ent->work); return mr; } } @@ -487,14 +504,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) list); list_del(&mr->list); ent->available_mrs--; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - if (ent->available_mrs < ent->limit) - queue_work(dev->cache.wq, &ent->work); break; } + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - queue_work(dev->cache.wq, &ent->work); } if (!mr) @@ -516,7 +531,6 @@ static void detach_mr_from_cache(struct mlx5_ib_mr *mr) void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; - int shrink = 0; if (!ent) return; @@ -524,20 +538,14 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); - if (ent->available_mrs < ent->limit) - queue_work(dev->cache.wq, &ent->work); return; } spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; - if (ent->available_mrs > 2 * ent->limit) - shrink = 1; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - if (shrink) - queue_work(dev->cache.wq, &ent->work); } static void clean_keys(struct mlx5_ib_dev *dev, int c) @@ -653,7 +661,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; - queue_work(cache->wq, &ent->work); + spin_lock_irq(&ent->lock); + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } mlx5_mr_cache_debugfs_init(dev); From b9358bdbc713cb64b8701bcbd450edc155d761a1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:36 +0200 Subject: [PATCH 14/16] RDMA/mlx5: Fix locking in MR cache work queue All of the members of mlx5_cache_ent must be accessed while holding the spinlock, add the missing spinlock in the __cache_work_func(). Using cache->stopped and flush_workqueue() is an inherently racy way to shutdown self-scheduling work on a queue. Replace it with ent->disabled under lock, and always check disabled before queuing any new work. Use cancel_work_sync() to shutdown the queue. Use READ_ONCE/WRITE_ONCE for dev->last_add to manage concurrency as coherency is less important here. Split fill_delay from the bitfield. C bitfield updates are not atomic and this is just a mess. Use READ_ONCE/WRITE_ONCE, but this could also use test_bit()/set_bit(). Link: https://lore.kernel.org/r/20200310082238.239865-11-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +- drivers/infiniband/hw/mlx5/mr.c | 121 +++++++++++++++++---------- 2 files changed, 80 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7208946d2787..a22932ffb9c8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -699,6 +699,8 @@ struct mlx5_cache_ent { u32 access_mode; u32 page; + u8 disabled:1; + /* * - available_mrs is the length of list head, ie the number of MRs * available for immediate allocation. @@ -725,7 +727,6 @@ struct mlx5_cache_ent { struct mlx5_mr_cache { struct workqueue_struct *wq; struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; - int stopped; struct dentry *root; unsigned long last_add; }; @@ -995,10 +996,10 @@ struct mlx5_ib_dev { */ struct mutex cap_mask_mutex; u8 ib_active:1; - u8 fill_delay:1; u8 is_rep:1; u8 lag_active:1; u8 wc_support:1; + u8 fill_delay; struct umr_common umrc; /* sync used page count stats */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b46039d86b98..424ce3de3865 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -113,13 +113,13 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = mr->cache_ent; unsigned long flags; - spin_lock_irqsave(&ent->lock, flags); - ent->pending--; - spin_unlock_irqrestore(&ent->lock, flags); if (status) { mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); kfree(mr); - dev->fill_delay = 1; + spin_lock_irqsave(&ent->lock, flags); + ent->pending--; + WRITE_ONCE(dev->fill_delay, 1); + spin_unlock_irqrestore(&ent->lock, flags); mod_timer(&dev->delay_timer, jiffies + HZ); return; } @@ -128,12 +128,13 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); - dev->cache.last_add = jiffies; + WRITE_ONCE(dev->cache.last_add, jiffies); spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; ent->total_mrs++; + ent->pending--; /* * Creating is always done in response to some demand, so do not call * queue_adjust_cache_locked(). @@ -159,11 +160,6 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { - if (ent->pending >= MAX_PENDING_REG_MR) { - err = -EAGAIN; - break; - } - mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { err = -ENOMEM; @@ -184,6 +180,12 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) MLX5_SET(mkc, mkc, log_page_size, ent->page); spin_lock_irq(&ent->lock); + if (ent->pending >= MAX_PENDING_REG_MR) { + err = -EAGAIN; + spin_unlock_irq(&ent->lock); + kfree(mr); + break; + } ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, @@ -204,15 +206,13 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return err; } -static void remove_cache_mr(struct mlx5_cache_ent *ent) +static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { struct mlx5_ib_mr *mr; - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); + lockdep_assert_held(&ent->lock); + if (list_empty(&ent->head)) return; - } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->available_mrs--; @@ -220,6 +220,7 @@ static void remove_cache_mr(struct mlx5_cache_ent *ent) spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); kfree(mr); + spin_lock_irq(&ent->lock); } static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, @@ -248,9 +249,7 @@ static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, } else return 0; } else { - spin_unlock_irq(&ent->lock); - remove_cache_mr(ent); - spin_lock_irq(&ent->lock); + remove_cache_mr_locked(ent); } } } @@ -359,16 +358,21 @@ static const struct file_operations limit_fops = { .read = limit_read, }; -static int someone_adding(struct mlx5_mr_cache *cache) +static bool someone_adding(struct mlx5_mr_cache *cache) { - int i; + unsigned int i; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - if (cache->ent[i].available_mrs < cache->ent[i].limit) - return 1; - } + struct mlx5_cache_ent *ent = &cache->ent[i]; + bool ret; - return 0; + spin_lock_irq(&ent->lock); + ret = ent->available_mrs < ent->limit; + spin_unlock_irq(&ent->lock); + if (ret) + return true; + } + return false; } /* @@ -380,6 +384,8 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { lockdep_assert_held(&ent->lock); + if (ent->disabled) + return; if (ent->available_mrs < ent->limit || ent->available_mrs > 2 * ent->limit) queue_work(ent->dev->cache.wq, &ent->work); @@ -391,27 +397,42 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) struct mlx5_mr_cache *cache = &dev->cache; int err; - if (cache->stopped) - return; + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; - if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) { + if (ent->available_mrs + ent->pending < 2 * ent->limit && + !READ_ONCE(dev->fill_delay)) { + spin_unlock_irq(&ent->lock); err = add_keys(ent, 1); - if (ent->available_mrs < 2 * ent->limit) { + + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (err) { if (err == -EAGAIN) { mlx5_ib_dbg(dev, "returned eagain, order %d\n", ent->order); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(3)); - } else if (err) { - mlx5_ib_warn(dev, "command failed order %d, err %d\n", - ent->order, err); + } else { + mlx5_ib_warn( + dev, + "command failed order %d, err %d\n", + ent->order, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); - } else { - queue_work(cache->wq, &ent->work); } } + /* + * Once we start populating due to hitting a low water mark + * continue until we pass the high water mark. + */ + if (ent->available_mrs + ent->pending < 2 * ent->limit) + queue_work(cache->wq, &ent->work); } else if (ent->available_mrs > 2 * ent->limit) { + bool need_delay; + /* * The remove_cache_mr() logic is performed as garbage * collection task. Such task is intended to be run when no @@ -424,15 +445,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) * the garbage collection work to try to run in next cycle, in * order to free CPU resources to other tasks. */ - if (!need_resched() && !someone_adding(cache) && - time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_cache_mr(ent); - if (ent->available_mrs > ent->limit) - queue_work(cache->wq, &ent->work); - } else { + spin_unlock_irq(&ent->lock); + need_delay = need_resched() || someone_adding(cache) || + time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (need_delay) queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); - } + remove_cache_mr_locked(ent); + queue_adjust_cache_locked(ent); } +out: + spin_unlock_irq(&ent->lock); } static void delayed_cache_work_func(struct work_struct *work) @@ -613,7 +639,7 @@ static void delay_time_func(struct timer_list *t) { struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); - dev->fill_delay = 0; + WRITE_ONCE(dev->fill_delay, 0); } int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) @@ -673,13 +699,20 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { - int i; + unsigned int i; if (!dev->cache.wq) return 0; - dev->cache.stopped = 1; - flush_workqueue(dev->cache.wq); + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + struct mlx5_cache_ent *ent = &dev->cache.ent[i]; + + spin_lock_irq(&ent->lock); + ent->disabled = true; + spin_unlock_irq(&ent->lock); + cancel_work_sync(&ent->work); + cancel_delayed_work_sync(&ent->dwork); + } mlx5_mr_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); From 1c78a21a0c6f8df98a2281c1c0232f9c85e1c44d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:37 +0200 Subject: [PATCH 15/16] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling Currently if the work queue is running then it is in 'hysteresis' mode and will fill until the cache reaches the high water mark. This implicit state is very tricky and doesn't interact with pending very well. Instead of self re-scheduling the work queue after the add_keys() has started to create the new MR, have the queue scheduled from reg_mr_callback() only after the requested MR has been added. This avoids the bad design of an in-rush of queue'd work doing back to back add_keys() until EAGAIN then sleeping. The add_keys() will be paced one at a time as they complete, slowly filling up the cache. Also, fix pending to be only manipulated under lock. Link: https://lore.kernel.org/r/20200310082238.239865-12-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 41 ++++++++++++++++++---------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a22932ffb9c8..1216575292a7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -700,6 +700,7 @@ struct mlx5_cache_ent { u32 page; u8 disabled:1; + u8 fill_to_high_water:1; /* * - available_mrs is the length of list head, ie the number of MRs diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 424ce3de3865..afacaf8981fa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -86,6 +86,7 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -134,11 +135,9 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) list_add_tail(&mr->list, &ent->head); ent->available_mrs++; ent->total_mrs++; + /* If we are doing fill_to_high_water then keep going. */ + queue_adjust_cache_locked(ent); ent->pending--; - /* - * Creating is always done in response to some demand, so do not call - * queue_adjust_cache_locked(). - */ spin_unlock_irqrestore(&ent->lock, flags); if (!completion_done(&ent->compl)) @@ -384,11 +383,29 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { lockdep_assert_held(&ent->lock); - if (ent->disabled) + if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) return; - if (ent->available_mrs < ent->limit || - ent->available_mrs > 2 * ent->limit) + if (ent->available_mrs < ent->limit) { + ent->fill_to_high_water = true; queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit) { + /* + * Once we start populating due to hitting a low water mark + * continue until we pass the high water mark. + */ + queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->available_mrs == 2 * ent->limit) { + ent->fill_to_high_water = false; + } else if (ent->available_mrs > 2 * ent->limit) { + /* Queue deletion of excess entries */ + ent->fill_to_high_water = false; + if (ent->pending) + queue_delayed_work(ent->dev->cache.wq, &ent->dwork, + msecs_to_jiffies(1000)); + else + queue_work(ent->dev->cache.wq, &ent->work); + } } static void __cache_work_func(struct mlx5_cache_ent *ent) @@ -401,11 +418,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (ent->disabled) goto out; - if (ent->available_mrs + ent->pending < 2 * ent->limit && + if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit && !READ_ONCE(dev->fill_delay)) { spin_unlock_irq(&ent->lock); err = add_keys(ent, 1); - spin_lock_irq(&ent->lock); if (ent->disabled) goto out; @@ -424,12 +441,6 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) msecs_to_jiffies(1000)); } } - /* - * Once we start populating due to hitting a low water mark - * continue until we pass the high water mark. - */ - if (ent->available_mrs + ent->pending < 2 * ent->limit) - queue_work(cache->wq, &ent->work); } else if (ent->available_mrs > 2 * ent->limit) { bool need_delay; From aad719dcf379f1413dcb168413a53fea66e2ef90 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Mar 2020 10:22:38 +0200 Subject: [PATCH 16/16] RDMA/mlx5: Allow MRs to be created in the cache synchronously If the cache is completely out of MRs, and we are running in cache mode, then directly, and synchronously, create an MR that is compatible with the cache bucket using a sleeping mailbox command. This ensures that the thread that is waiting for the MR absolutely will get one. When a MR allocated in this way becomes freed then it is compatible with the cache bucket and will be recycled back into it. Deletes the very buggy ent->compl scheme to create a synchronous MR allocation. Link: https://lore.kernel.org/r/20200310082238.239865-13-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/mr.c | 149 ++++++++++++++++----------- 2 files changed, 88 insertions(+), 62 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1216575292a7..a5da2d5cf659 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -722,7 +722,6 @@ struct mlx5_cache_ent { struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; - struct completion compl; }; struct mlx5_mr_cache { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index afacaf8981fa..a401931189b7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -139,14 +139,34 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) queue_adjust_cache_locked(ent); ent->pending--; spin_unlock_irqrestore(&ent->lock, flags); - - if (!completion_done(&ent->compl)) - complete(&ent->compl); } +static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) +{ + struct mlx5_ib_mr *mr; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return NULL; + mr->order = ent->order; + mr->cache_ent = ent; + mr->dev = ent->dev; + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, log_page_size, ent->page); + return mr; +} + +/* Asynchronously schedule new MRs to be populated in the cache. */ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -159,25 +179,11 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = alloc_cache_mr(ent, mkc); if (!mr) { err = -ENOMEM; break; } - mr->order = ent->order; - mr->cache_ent = ent; - mr->dev = ent->dev; - - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (ent->access_mode >> 2) & 0x7); - - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); - MLX5_SET(mkc, mkc, log_page_size, ent->page); - spin_lock_irq(&ent->lock); if (ent->pending >= MAX_PENDING_REG_MR) { err = -EAGAIN; @@ -205,6 +211,44 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return err; } +/* Synchronously create a MR in the cache */ +static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) +{ + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + mr = alloc_cache_mr(ent, mkc); + if (!mr) { + err = -ENOMEM; + goto free_in; + } + + err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto free_mr; + + mr->mmkey.type = MLX5_MKEY_MR; + WRITE_ONCE(ent->dev->cache.last_add, jiffies); + spin_lock_irq(&ent->lock); + ent->total_mrs++; + spin_unlock_irq(&ent->lock); + kfree(in); + return mr; +free_mr: + kfree(mr); +free_in: + kfree(in); + return ERR_PTR(err); +} + static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { struct mlx5_ib_mr *mr; @@ -427,12 +471,12 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (ent->disabled) goto out; if (err) { - if (err == -EAGAIN) { - mlx5_ib_dbg(dev, "returned eagain, order %d\n", - ent->order); - queue_delayed_work(cache->wq, &ent->dwork, - msecs_to_jiffies(3)); - } else { + /* + * EAGAIN only happens if pending is positive, so we + * will be rescheduled from reg_mr_callback(). The only + * failure path here is ENOMEM. + */ + if (err != -EAGAIN) { mlx5_ib_warn( dev, "command failed order %d, err %d\n", @@ -495,36 +539,30 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - int err; if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); ent = &cache->ent[entry]; - while (1) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - - err = add_keys(ent, 1); - if (err && err != -EAGAIN) - return ERR_PTR(err); - - wait_for_completion(&ent->compl); - } else { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + mr = create_cache_mr(ent); + if (IS_ERR(mr)) return mr; - } + } else { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } + return mr; } -static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) +/* Return a MR already available in the cache */ +static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; @@ -676,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->dev = dev; ent->limit = 0; - init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); @@ -939,26 +976,16 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); struct mlx5_ib_mr *mr; - int err = 0; - int i; if (!ent) return ERR_PTR(-E2BIG); - for (i = 0; i < 1; i++) { - mr = alloc_cached_mr(ent); - if (mr) - break; - - err = add_keys(ent, 1); - if (err && err != -EAGAIN) { - mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); - break; - } + mr = get_cache_mr(ent); + if (!mr) { + mr = create_cache_mr(ent); + if (IS_ERR(mr)) + return mr; } - if (!mr) - return ERR_PTR(-EAGAIN); - mr->ibmr.pd = pd; mr->umem = umem; mr->access_flags = access_flags;