diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index de5275a34da3..b7151f8f8fa5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6389,6 +6389,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); + atomic_set(&dev->mkey_var, 0); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4b7d0dfabea2..2be773a24dda 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -616,8 +616,8 @@ struct mlx5_ib_mr { struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - int order; - bool allocated_from_cache; + unsigned int order; + struct mlx5_cache_ent *cache_ent; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; @@ -699,22 +699,34 @@ struct mlx5_cache_ent { u32 access_mode; u32 page; - u32 size; - u32 cur; + u8 disabled:1; + u8 fill_to_high_water:1; + + /* + * - available_mrs is the length of list head, ie the number of MRs + * available for immediate allocation. + * - total_mrs is available_mrs plus all in use MRs that could be + * returned to the cache. + * - limit is the low water mark for available_mrs, 2* limit is the + * upper water mark. + * - pending is the number of MRs currently being created + */ + u32 total_mrs; + u32 available_mrs; + u32 limit; + u32 pending; + + /* Statistics */ u32 miss; - u32 limit; struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; - int pending; - struct completion compl; }; struct mlx5_mr_cache { struct workqueue_struct *wq; struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; - int stopped; struct dentry *root; unsigned long last_add; }; @@ -986,14 +998,16 @@ struct mlx5_ib_dev { */ struct mutex cap_mask_mutex; u8 ib_active:1; - u8 fill_delay:1; u8 is_rep:1; u8 lag_active:1; u8 wc_support:1; + u8 fill_delay; struct umr_common umrc; /* sync used page count stats */ struct mlx5_ib_resources devr; + + atomic_t mkey_var; struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ @@ -1263,7 +1277,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6fa0a83c19de..a401931189b7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,9 +47,46 @@ enum { #define MLX5_UMR_ALIGN 2048 +static void +create_mkey_callback(int status, struct mlx5_async_work *context); + +static void +assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in) +{ + u8 key = atomic_inc_return(&dev->mkey_var); + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; +} + +static int +mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); +} + +static int +mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mkey, + struct mlx5_async_ctx *async_ctx, + u32 *in, int inlen, u32 *out, int outlen, + struct mlx5_async_work *context) +{ + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + assign_mkey_variant(dev, mkey, in); + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, + create_mkey_callback, context); +} + static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -63,67 +100,73 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static int order2idx(struct mlx5_ib_dev *dev, int order) -{ - struct mlx5_mr_cache *cache = &dev->cache; - - if (order < cache->ent[0].order) - return 0; - else - return order - cache->ent[0].order; -} - static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) { return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void reg_mr_callback(int status, struct mlx5_async_work *context) +static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; - struct mlx5_mr_cache *cache = &dev->cache; - int c = order2idx(dev, mr->order); - struct mlx5_cache_ent *ent = &cache->ent[c]; - u8 key; + struct mlx5_cache_ent *ent = mr->cache_ent; unsigned long flags; - spin_lock_irqsave(&ent->lock, flags); - ent->pending--; - spin_unlock_irqrestore(&ent->lock, flags); if (status) { mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); kfree(mr); - dev->fill_delay = 1; + spin_lock_irqsave(&ent->lock, flags); + ent->pending--; + WRITE_ONCE(dev->fill_delay, 1); + spin_unlock_irqrestore(&ent->lock, flags); mod_timer(&dev->delay_timer, jiffies + HZ); return; } mr->mmkey.type = MLX5_MKEY_MR; - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); - key = dev->mdev->priv.mkey_key++; - spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; + mr->mmkey.key |= mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, mr->out, mkey_index)); - cache->last_add = jiffies; + WRITE_ONCE(dev->cache.last_add, jiffies); spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); - ent->cur++; - ent->size++; + ent->available_mrs++; + ent->total_mrs++; + /* If we are doing fill_to_high_water then keep going. */ + queue_adjust_cache_locked(ent); + ent->pending--; spin_unlock_irqrestore(&ent->lock, flags); - - if (!completion_done(&ent->compl)) - complete(&ent->compl); } -static int add_keys(struct mlx5_ib_dev *dev, int c, int num) +static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_ib_mr *mr; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return NULL; + mr->order = ent->order; + mr->cache_ent = ent; + mr->dev = ent->dev; + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, log_page_size, ent->page); + return mr; +} + +/* Asynchronously schedule new MRs to be populated in the cache. */ +static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) +{ + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -136,42 +179,29 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { - if (ent->pending >= MAX_PENDING_REG_MR) { - err = -EAGAIN; - break; - } - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = alloc_cache_mr(ent, mkc); if (!mr) { err = -ENOMEM; break; } - mr->order = ent->order; - mr->allocated_from_cache = true; - mr->dev = dev; - - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (ent->access_mode >> 2) & 0x7); - - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); - MLX5_SET(mkc, mkc, log_page_size, ent->page); - spin_lock_irq(&ent->lock); + if (ent->pending >= MAX_PENDING_REG_MR) { + err = -EAGAIN; + spin_unlock_irq(&ent->lock); + kfree(mr); + break; + } ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - &dev->async_ctx, in, inlen, - mr->out, sizeof(mr->out), - reg_mr_callback, &mr->cb_work); + err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, + &ent->dev->async_ctx, in, inlen, + mr->out, sizeof(mr->out), + &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; spin_unlock_irq(&ent->lock); - mlx5_ib_warn(dev, "create mkey failed %d\n", err); + mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); kfree(mr); break; } @@ -181,32 +211,89 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) return err; } -static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) +/* Synchronously create a MR in the cache */ +static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_ib_mr *tmp_mr; + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; - LIST_HEAD(del_list); - int i; + void *mkc; + u32 *in; + int err; - for (i = 0; i < num; i++) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - break; - } - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; - spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + mr = alloc_cache_mr(ent, mkc); + if (!mr) { + err = -ENOMEM; + goto free_in; } - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { - list_del(&mr->list); - kfree(mr); + err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto free_mr; + + mr->mmkey.type = MLX5_MKEY_MR; + WRITE_ONCE(ent->dev->cache.last_add, jiffies); + spin_lock_irq(&ent->lock); + ent->total_mrs++; + spin_unlock_irq(&ent->lock); + kfree(in); + return mr; +free_mr: + kfree(mr); +free_in: + kfree(in); + return ERR_PTR(err); +} + +static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) +{ + struct mlx5_ib_mr *mr; + + lockdep_assert_held(&ent->lock); + if (list_empty(&ent->head)) + return; + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + ent->total_mrs--; + spin_unlock_irq(&ent->lock); + mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + kfree(mr); + spin_lock_irq(&ent->lock); +} + +static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, + bool limit_fill) +{ + int err; + + lockdep_assert_held(&ent->lock); + + while (true) { + if (limit_fill) + target = ent->limit * 2; + if (target == ent->available_mrs + ent->pending) + return 0; + if (target > ent->available_mrs + ent->pending) { + u32 todo = target - (ent->available_mrs + ent->pending); + + spin_unlock_irq(&ent->lock); + err = add_keys(ent, todo); + if (err == -EAGAIN) + usleep_range(3000, 5000); + spin_lock_irq(&ent->lock); + if (err) { + if (err != -EAGAIN) + return err; + } else + return 0; + } else { + remove_cache_mr_locked(ent); + } } } @@ -214,37 +301,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20] = {0}; - u32 var; + u32 target; int err; - int c; - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; + err = kstrtou32_from_user(buf, count, 0, &target); + if (err) + return err; - c = order2idx(dev, ent->order); - - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; - - if (var < ent->limit) - return -EINVAL; - - if (var > ent->size) { - do { - err = add_keys(dev, c, var - ent->size); - if (err && err != -EAGAIN) - return err; - - usleep_range(3000, 5000); - } while (err); - } else if (var < ent->size) { - remove_keys(dev, c, ent->size - var); + /* + * Target is the new value of total_mrs the user requests, however we + * cannot free MRs that are in use. Compute the target value for + * available_mrs. + */ + spin_lock_irq(&ent->lock); + if (target < ent->total_mrs - ent->available_mrs) { + err = -EINVAL; + goto err_unlock; } + target = target - (ent->total_mrs - ent->available_mrs); + if (target < ent->limit || target > ent->limit*2) { + err = -EINVAL; + goto err_unlock; + } + err = resize_available_mrs(ent, target, false); + if (err) + goto err_unlock; + spin_unlock_irq(&ent->lock); return count; + +err_unlock: + spin_unlock_irq(&ent->lock); + return err; } static ssize_t size_read(struct file *filp, char __user *buf, size_t count, @@ -254,7 +342,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs); if (err < 0) return err; @@ -272,32 +360,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20] = {0}; u32 var; int err; - int c; - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; - - c = order2idx(dev, ent->order); - - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; - - if (var > ent->size) - return -EINVAL; + err = kstrtou32_from_user(buf, count, 0, &var); + if (err) + return err; + /* + * Upon set we immediately fill the cache to high water mark implied by + * the limit. + */ + spin_lock_irq(&ent->lock); ent->limit = var; - - if (ent->cur < ent->limit) { - err = add_keys(dev, c, 2 * ent->limit - ent->cur); - if (err) - return err; - } - + err = resize_available_mrs(ent, 0, true); + spin_unlock_irq(&ent->lock); + if (err) + return err; return count; } @@ -322,68 +401,119 @@ static const struct file_operations limit_fops = { .read = limit_read, }; -static int someone_adding(struct mlx5_mr_cache *cache) +static bool someone_adding(struct mlx5_mr_cache *cache) { - int i; + unsigned int i; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - if (cache->ent[i].cur < cache->ent[i].limit) - return 1; - } + struct mlx5_cache_ent *ent = &cache->ent[i]; + bool ret; - return 0; + spin_lock_irq(&ent->lock); + ret = ent->available_mrs < ent->limit; + spin_unlock_irq(&ent->lock); + if (ret) + return true; + } + return false; +} + +/* + * Check if the bucket is outside the high/low water mark and schedule an async + * update. The cache refill has hysteresis, once the low water mark is hit it is + * refilled up to the high mark. + */ +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) +{ + lockdep_assert_held(&ent->lock); + + if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) + return; + if (ent->available_mrs < ent->limit) { + ent->fill_to_high_water = true; + queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit) { + /* + * Once we start populating due to hitting a low water mark + * continue until we pass the high water mark. + */ + queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->available_mrs == 2 * ent->limit) { + ent->fill_to_high_water = false; + } else if (ent->available_mrs > 2 * ent->limit) { + /* Queue deletion of excess entries */ + ent->fill_to_high_water = false; + if (ent->pending) + queue_delayed_work(ent->dev->cache.wq, &ent->dwork, + msecs_to_jiffies(1000)); + else + queue_work(ent->dev->cache.wq, &ent->work); + } } static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; struct mlx5_mr_cache *cache = &dev->cache; - int i = order2idx(dev, ent->order); int err; - if (cache->stopped) - return; + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; - ent = &dev->cache.ent[i]; - if (ent->cur < 2 * ent->limit && !dev->fill_delay) { - err = add_keys(dev, i, 1); - if (ent->cur < 2 * ent->limit) { - if (err == -EAGAIN) { - mlx5_ib_dbg(dev, "returned eagain, order %d\n", - i + 2); - queue_delayed_work(cache->wq, &ent->dwork, - msecs_to_jiffies(3)); - } else if (err) { - mlx5_ib_warn(dev, "command failed order %d, err %d\n", - i + 2, err); + if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit && + !READ_ONCE(dev->fill_delay)) { + spin_unlock_irq(&ent->lock); + err = add_keys(ent, 1); + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (err) { + /* + * EAGAIN only happens if pending is positive, so we + * will be rescheduled from reg_mr_callback(). The only + * failure path here is ENOMEM. + */ + if (err != -EAGAIN) { + mlx5_ib_warn( + dev, + "command failed order %d, err %d\n", + ent->order, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); - } else { - queue_work(cache->wq, &ent->work); } } - } else if (ent->cur > 2 * ent->limit) { + } else if (ent->available_mrs > 2 * ent->limit) { + bool need_delay; + /* - * The remove_keys() logic is performed as garbage collection - * task. Such task is intended to be run when no other active - * processes are running. + * The remove_cache_mr() logic is performed as garbage + * collection task. Such task is intended to be run when no + * other active processes are running. * * The need_resched() will return TRUE if there are user tasks * to be activated in near future. * - * In such case, we don't execute remove_keys() and postpone - * the garbage collection work to try to run in next cycle, - * in order to free CPU resources to other tasks. + * In such case, we don't execute remove_cache_mr() and postpone + * the garbage collection work to try to run in next cycle, in + * order to free CPU resources to other tasks. */ - if (!need_resched() && !someone_adding(cache) && - time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_keys(dev, i, 1); - if (ent->cur > ent->limit) - queue_work(cache->wq, &ent->work); - } else { + spin_unlock_irq(&ent->lock); + need_delay = need_resched() || someone_adding(cache) || + time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (need_delay) queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); - } + remove_cache_mr_locked(ent); + queue_adjust_cache_locked(ent); } +out: + spin_unlock_irq(&ent->lock); } static void delayed_cache_work_func(struct work_struct *work) @@ -402,117 +532,95 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) +/* Allocate a special entry from the cache */ +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - int err; - if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { - mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); + if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || + entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); - } ent = &cache->ent[entry]; - while (1) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - - err = add_keys(dev, entry, 1); - if (err && err != -EAGAIN) - return ERR_PTR(err); - - wait_for_completion(&ent->compl); - } else { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->cur--; - spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) - queue_work(cache->wq, &ent->work); + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + mr = create_cache_mr(ent); + if (IS_ERR(mr)) return mr; - } + } else { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } + return mr; } -static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) +/* Return a MR already available in the cache */ +static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent; - int last_umr_cache_entry; - int c; - int i; + struct mlx5_cache_ent *ent = req_ent; - c = order2idx(dev, order); - last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); - if (c < 0 || c > last_umr_cache_entry) { - mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); - return NULL; - } - - for (i = c; i <= last_umr_cache_entry; i++) { - ent = &cache->ent[i]; - - mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); + /* Try larger MR pools from the cache to satisfy the allocation */ + for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { + mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, + ent - dev->cache.ent); spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); - ent->cur--; + ent->available_mrs--; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) - queue_work(cache->wq, &ent->work); break; } + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - queue_work(cache->wq, &ent->work); } if (!mr) - cache->ent[c].miss++; + req_ent->miss++; return mr; } +static void detach_mr_from_cache(struct mlx5_ib_mr *mr) +{ + struct mlx5_cache_ent *ent = mr->cache_ent; + + mr->cache_ent = NULL; + spin_lock_irq(&ent->lock); + ent->total_mrs--; + spin_unlock_irq(&ent->lock); +} + void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int shrink = 0; - int c; + struct mlx5_cache_ent *ent = mr->cache_ent; - if (!mr->allocated_from_cache) + if (!ent) return; - c = order2idx(dev, mr->order); - WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (mlx5_mr_cache_invalidate(mr)) { - mr->allocated_from_cache = false; + detach_mr_from_cache(mr); destroy_mkey(dev, mr); - ent = &cache->ent[c]; - if (ent->cur < ent->limit) - queue_work(cache->wq, &ent->work); return; } - ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); - ent->cur++; - if (ent->cur > 2 * ent->limit) - shrink = 1; + ent->available_mrs++; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - if (shrink) - queue_work(cache->wq, &ent->work); } static void clean_keys(struct mlx5_ib_dev *dev, int c) @@ -532,8 +640,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; + ent->available_mrs--; + ent->total_mrs--; spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } @@ -571,7 +679,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) dir = debugfs_create_dir(ent->name, cache->root); debugfs_create_file("size", 0600, dir, ent, &size_fops); debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("cur", 0400, dir, &ent->available_mrs); debugfs_create_u32("miss", 0600, dir, &ent->miss); } } @@ -580,7 +688,7 @@ static void delay_time_func(struct timer_list *t) { struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); - dev->fill_delay = 0; + WRITE_ONCE(dev->fill_delay, 0); } int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) @@ -606,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->dev = dev; ent->limit = 0; - init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); @@ -628,7 +735,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; - queue_work(cache->wq, &ent->work); + spin_lock_irq(&ent->lock); + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } mlx5_mr_cache_debugfs_init(dev); @@ -638,13 +747,20 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { - int i; + unsigned int i; if (!dev->cache.wq) return 0; - dev->cache.stopped = 1; - flush_workqueue(dev->cache.wq); + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + struct mlx5_cache_ent *ent = &dev->cache.ent[i]; + + spin_lock_irq(&ent->lock); + ent->disabled = true; + spin_unlock_irq(&ent->lock); + cancel_work_sync(&ent->work); + cancel_delayed_work_sync(&ent->dwork); + } mlx5_mr_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); @@ -685,7 +801,6 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -707,7 +822,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) MLX5_SET(mkc, mkc, length64, 1); set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -840,31 +955,37 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_ib_mr *alloc_mr_from_cache( - struct ib_pd *pd, struct ib_umem *umem, - u64 virt_addr, u64 len, int npages, - int page_shift, int order, int access_flags) +static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, + unsigned int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + + if (order < cache->ent[0].order) + return &cache->ent[0]; + order = order - cache->ent[0].order; + if (order > MR_CACHE_LAST_STD_ENTRY) + return NULL; + return &cache->ent[order]; +} + +static struct mlx5_ib_mr * +alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, + u64 len, int npages, int page_shift, unsigned int order, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); struct mlx5_ib_mr *mr; - int err = 0; - int i; - for (i = 0; i < 1; i++) { - mr = alloc_cached_mr(dev, order); - if (mr) - break; - - err = add_keys(dev, order2idx(dev, order), 1); - if (err && err != -EAGAIN) { - mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); - break; - } + if (!ent) + return ERR_PTR(-E2BIG); + mr = get_cache_mr(ent); + if (!mr) { + mr = create_cache_mr(ent); + if (IS_ERR(mr)) + return mr; } - if (!mr) - return ERR_PTR(-EAGAIN); - mr->ibmr.pd = pd; mr->umem = umem; mr->access_flags = access_flags; @@ -1097,7 +1218,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, get_octo_len(virt_addr, length, page_shift)); } - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1137,7 +1258,6 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -1160,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET64(mkc, mkc, len, length); set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1439,10 +1559,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->allocated_from_cache) - err = mlx5_mr_cache_invalidate(mr); - else - err = destroy_mkey(dev, mr); + if (mr->cache_ent) + detach_mr_from_cache(mr); + err = destroy_mkey(dev, mr); if (err) goto err; @@ -1454,8 +1573,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mr = to_mmr(ib_mr); goto err; } - - mr->allocated_from_cache = false; } else { /* * Send a UMR WQE @@ -1542,8 +1659,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int allocated_from_cache = mr->allocated_from_cache; - if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) @@ -1558,7 +1673,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig = NULL; } - if (!allocated_from_cache) { + if (!mr->cache_ent) { destroy_mkey(dev, mr); mlx5_free_priv_descs(mr); } @@ -1575,7 +1690,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); - if (mr->allocated_from_cache) + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); @@ -1638,7 +1753,7 @@ static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_free_descs; @@ -1905,7 +2020,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); if (err) goto free; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index bf50cd91f472..3de7606d4a1a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) odp->private = NULL; mutex_unlock(&odp->umem_mutex); - if (!mr->allocated_from_cache) { + if (!mr->cache_ent) { mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); WARN_ON(mr->descs); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index 3fc575d1c3ec..dcea87ec5977 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -42,7 +42,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); MLX5_SET(encryption_key_obj, obj, key_type, - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK); + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS); MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f554cfddcf4e..6b38ec72215a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1282,7 +1282,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&priv->alloc_mutex); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); - spin_lock_init(&priv->mkey_lock); priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 42cc3c7ac5b6..fd3e6d217c3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,12 +36,9 @@ #include #include "mlx5_core.h" -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context) +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; u32 mkey_index; @@ -49,41 +46,23 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, int err; u8 key; - spin_lock_irq(&dev->priv.mkey_lock); - key = dev->priv.mkey_key++; - spin_unlock_irq(&dev->priv.mkey_lock); - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - MLX5_SET(mkc, mkc, mkey_7_0, key); - - if (callback) - return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, - callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); if (err) return err; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); - mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); return 0; } -EXPORT_SYMBOL(mlx5_core_create_mkey_cb); - -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen) -{ - return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, - NULL, 0, NULL, NULL); -} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f2b4225ed650..1de78f001d26 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -575,10 +575,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; @@ -947,12 +943,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3a7189f9d6d..208bf1127be7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -414,7 +414,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_16[0x1]; u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; - u8 reserved_at_19[0x7]; + u8 reformat_and_fwd_to_table[0x1]; + u8 reserved_at_1a[0x6]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -741,7 +742,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 flow_source[0x1]; u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; - u8 reserved_at_1b[0x1]; + u8 egress_acl_forward_to_vport[0x1]; u8 fdb_multi_path_to_table[0x1]; u8 reserved_at_1d[0x3]; @@ -8430,7 +8431,8 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x1c]; + u8 reserved_at_24[0xc]; + u8 data_rate_oper[0x10]; u8 ext_eth_proto_capability[0x20]; @@ -10496,7 +10498,8 @@ enum { }; enum { - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2, }; struct mlx5_ifc_tls_static_params_bits {