Merge branch 'mlx5_mr_cache' into rdma.git for-next

Leon Romanovsky says:

====================
This series fixes various corner cases in the mlx5_ib MR cache
implementation, see specific commit messages for more information.
====================

Based on the mlx5-next branch at
 git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Due to dependencies

* branch 'mlx5_mr-cache':
  RDMA/mlx5: Allow MRs to be created in the cache synchronously
  RDMA/mlx5: Revise how the hysteresis scheme works for cache filling
  RDMA/mlx5: Fix locking in MR cache work queue
  RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work
  RDMA/mlx5: Fix MR cache size and limit debugfs
  RDMA/mlx5: Always remove MRs from the cache before destroying them
  RDMA/mlx5: Simplify how the MR cache bucket is located
  RDMA/mlx5: Rename the tracking variables for the MR cache
  RDMA/mlx5: Replace spinlock protected write with atomic var
  {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib
  {IB,net}/mlx5: Assign mkey variant in mlx5_ib only
  {IB,net}/mlx5: Setup mkey variant before mr create command invocation
This commit is contained in:
Jason Gunthorpe 2020-03-13 11:11:07 -03:00
commit d613bd64c6
9 changed files with 429 additions and 327 deletions

View File

@ -6389,6 +6389,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
spin_lock_init(&dev->reset_flow_resource_lock);
xa_init(&dev->odp_mkeys);
xa_init(&dev->sig_mrs);
atomic_set(&dev->mkey_var, 0);
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;

View File

@ -616,8 +616,8 @@ struct mlx5_ib_mr {
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
struct list_head list;
int order;
bool allocated_from_cache;
unsigned int order;
struct mlx5_cache_ent *cache_ent;
int npages;
struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
@ -699,22 +699,34 @@ struct mlx5_cache_ent {
u32 access_mode;
u32 page;
u32 size;
u32 cur;
u8 disabled:1;
u8 fill_to_high_water:1;
/*
* - available_mrs is the length of list head, ie the number of MRs
* available for immediate allocation.
* - total_mrs is available_mrs plus all in use MRs that could be
* returned to the cache.
* - limit is the low water mark for available_mrs, 2* limit is the
* upper water mark.
* - pending is the number of MRs currently being created
*/
u32 total_mrs;
u32 available_mrs;
u32 limit;
u32 pending;
/* Statistics */
u32 miss;
u32 limit;
struct mlx5_ib_dev *dev;
struct work_struct work;
struct delayed_work dwork;
int pending;
struct completion compl;
};
struct mlx5_mr_cache {
struct workqueue_struct *wq;
struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
int stopped;
struct dentry *root;
unsigned long last_add;
};
@ -986,14 +998,16 @@ struct mlx5_ib_dev {
*/
struct mutex cap_mask_mutex;
u8 ib_active:1;
u8 fill_delay:1;
u8 is_rep:1;
u8 lag_active:1;
u8 wc_support:1;
u8 fill_delay;
struct umr_common umrc;
/* sync used page count stats
*/
struct mlx5_ib_resources devr;
atomic_t mkey_var;
struct mlx5_mr_cache cache;
struct timer_list delay_timer;
/* Prevents soft lock on massive reg MRs */
@ -1263,7 +1277,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
unsigned int entry);
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);

View File

@ -47,9 +47,46 @@ enum {
#define MLX5_UMR_ALIGN 2048
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static void
assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
u32 *in)
{
u8 key = atomic_inc_return(&dev->mkey_var);
void *mkc;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, mkey_7_0, key);
mkey->key = key;
}
static int
mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
u32 *in, int inlen)
{
assign_mkey_variant(dev, mkey, in);
return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
}
static int
mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
struct mlx5_core_mkey *mkey,
struct mlx5_async_ctx *async_ctx,
u32 *in, int inlen, u32 *out, int outlen,
struct mlx5_async_work *context)
{
MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
assign_mkey_variant(dev, mkey, in);
return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
create_mkey_callback, context);
}
static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
@ -63,67 +100,73 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
static int order2idx(struct mlx5_ib_dev *dev, int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
if (order < cache->ent[0].order)
return 0;
else
return order - cache->ent[0].order;
}
static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
{
return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
}
static void reg_mr_callback(int status, struct mlx5_async_work *context)
static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
container_of(context, struct mlx5_ib_mr, cb_work);
struct mlx5_ib_dev *dev = mr->dev;
struct mlx5_mr_cache *cache = &dev->cache;
int c = order2idx(dev, mr->order);
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
struct mlx5_cache_ent *ent = mr->cache_ent;
unsigned long flags;
spin_lock_irqsave(&ent->lock, flags);
ent->pending--;
spin_unlock_irqrestore(&ent->lock, flags);
if (status) {
mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
kfree(mr);
dev->fill_delay = 1;
spin_lock_irqsave(&ent->lock, flags);
ent->pending--;
WRITE_ONCE(dev->fill_delay, 1);
spin_unlock_irqrestore(&ent->lock, flags);
mod_timer(&dev->delay_timer, jiffies + HZ);
return;
}
mr->mmkey.type = MLX5_MKEY_MR;
spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
mr->mmkey.key |= mlx5_idx_to_mkey(
MLX5_GET(create_mkey_out, mr->out, mkey_index));
cache->last_add = jiffies;
WRITE_ONCE(dev->cache.last_add, jiffies);
spin_lock_irqsave(&ent->lock, flags);
list_add_tail(&mr->list, &ent->head);
ent->cur++;
ent->size++;
ent->available_mrs++;
ent->total_mrs++;
/* If we are doing fill_to_high_water then keep going. */
queue_adjust_cache_locked(ent);
ent->pending--;
spin_unlock_irqrestore(&ent->lock, flags);
if (!completion_done(&ent->compl))
complete(&ent->compl);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return NULL;
mr->order = ent->order;
mr->cache_ent = ent;
mr->dev = ent->dev;
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
MLX5_SET(mkc, mkc, log_page_size, ent->page);
return mr;
}
/* Asynchronously schedule new MRs to be populated in the cache. */
static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
{
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
@ -136,42 +179,29 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
break;
}
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
mr = alloc_cache_mr(ent, mkc);
if (!mr) {
err = -ENOMEM;
break;
}
mr->order = ent->order;
mr->allocated_from_cache = true;
mr->dev = dev;
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2,
(ent->access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
MLX5_SET(mkc, mkc, log_page_size, ent->page);
spin_lock_irq(&ent->lock);
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
spin_unlock_irq(&ent->lock);
kfree(mr);
break;
}
ent->pending++;
spin_unlock_irq(&ent->lock);
err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
&dev->async_ctx, in, inlen,
mr->out, sizeof(mr->out),
reg_mr_callback, &mr->cb_work);
err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
&ent->dev->async_ctx, in, inlen,
mr->out, sizeof(mr->out),
&mr->cb_work);
if (err) {
spin_lock_irq(&ent->lock);
ent->pending--;
spin_unlock_irq(&ent->lock);
mlx5_ib_warn(dev, "create mkey failed %d\n", err);
mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
kfree(mr);
break;
}
@ -181,32 +211,89 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
return err;
}
static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
/* Synchronously create a MR in the cache */
static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_ib_mr *tmp_mr;
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
LIST_HEAD(del_list);
int i;
void *mkc;
u32 *in;
int err;
for (i = 0; i < num; i++) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
spin_unlock_irq(&ent->lock);
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return ERR_PTR(-ENOMEM);
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
mr = alloc_cache_mr(ent, mkc);
if (!mr) {
err = -ENOMEM;
goto free_in;
}
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
kfree(mr);
err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
if (err)
goto free_mr;
mr->mmkey.type = MLX5_MKEY_MR;
WRITE_ONCE(ent->dev->cache.last_add, jiffies);
spin_lock_irq(&ent->lock);
ent->total_mrs++;
spin_unlock_irq(&ent->lock);
kfree(in);
return mr;
free_mr:
kfree(mr);
free_in:
kfree(in);
return ERR_PTR(err);
}
static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_mr *mr;
lockdep_assert_held(&ent->lock);
if (list_empty(&ent->head))
return;
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->available_mrs--;
ent->total_mrs--;
spin_unlock_irq(&ent->lock);
mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
kfree(mr);
spin_lock_irq(&ent->lock);
}
static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
bool limit_fill)
{
int err;
lockdep_assert_held(&ent->lock);
while (true) {
if (limit_fill)
target = ent->limit * 2;
if (target == ent->available_mrs + ent->pending)
return 0;
if (target > ent->available_mrs + ent->pending) {
u32 todo = target - (ent->available_mrs + ent->pending);
spin_unlock_irq(&ent->lock);
err = add_keys(ent, todo);
if (err == -EAGAIN)
usleep_range(3000, 5000);
spin_lock_irq(&ent->lock);
if (err) {
if (err != -EAGAIN)
return err;
} else
return 0;
} else {
remove_cache_mr_locked(ent);
}
}
}
@ -214,37 +301,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
struct mlx5_ib_dev *dev = ent->dev;
char lbuf[20] = {0};
u32 var;
u32 target;
int err;
int c;
count = min(count, sizeof(lbuf) - 1);
if (copy_from_user(lbuf, buf, count))
return -EFAULT;
err = kstrtou32_from_user(buf, count, 0, &target);
if (err)
return err;
c = order2idx(dev, ent->order);
if (sscanf(lbuf, "%u", &var) != 1)
return -EINVAL;
if (var < ent->limit)
return -EINVAL;
if (var > ent->size) {
do {
err = add_keys(dev, c, var - ent->size);
if (err && err != -EAGAIN)
return err;
usleep_range(3000, 5000);
} while (err);
} else if (var < ent->size) {
remove_keys(dev, c, ent->size - var);
/*
* Target is the new value of total_mrs the user requests, however we
* cannot free MRs that are in use. Compute the target value for
* available_mrs.
*/
spin_lock_irq(&ent->lock);
if (target < ent->total_mrs - ent->available_mrs) {
err = -EINVAL;
goto err_unlock;
}
target = target - (ent->total_mrs - ent->available_mrs);
if (target < ent->limit || target > ent->limit*2) {
err = -EINVAL;
goto err_unlock;
}
err = resize_available_mrs(ent, target, false);
if (err)
goto err_unlock;
spin_unlock_irq(&ent->lock);
return count;
err_unlock:
spin_unlock_irq(&ent->lock);
return err;
}
static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
@ -254,7 +342,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
if (err < 0)
return err;
@ -272,32 +360,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
struct mlx5_ib_dev *dev = ent->dev;
char lbuf[20] = {0};
u32 var;
int err;
int c;
count = min(count, sizeof(lbuf) - 1);
if (copy_from_user(lbuf, buf, count))
return -EFAULT;
c = order2idx(dev, ent->order);
if (sscanf(lbuf, "%u", &var) != 1)
return -EINVAL;
if (var > ent->size)
return -EINVAL;
err = kstrtou32_from_user(buf, count, 0, &var);
if (err)
return err;
/*
* Upon set we immediately fill the cache to high water mark implied by
* the limit.
*/
spin_lock_irq(&ent->lock);
ent->limit = var;
if (ent->cur < ent->limit) {
err = add_keys(dev, c, 2 * ent->limit - ent->cur);
if (err)
return err;
}
err = resize_available_mrs(ent, 0, true);
spin_unlock_irq(&ent->lock);
if (err)
return err;
return count;
}
@ -322,68 +401,119 @@ static const struct file_operations limit_fops = {
.read = limit_read,
};
static int someone_adding(struct mlx5_mr_cache *cache)
static bool someone_adding(struct mlx5_mr_cache *cache)
{
int i;
unsigned int i;
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
if (cache->ent[i].cur < cache->ent[i].limit)
return 1;
}
struct mlx5_cache_ent *ent = &cache->ent[i];
bool ret;
return 0;
spin_lock_irq(&ent->lock);
ret = ent->available_mrs < ent->limit;
spin_unlock_irq(&ent->lock);
if (ret)
return true;
}
return false;
}
/*
* Check if the bucket is outside the high/low water mark and schedule an async
* update. The cache refill has hysteresis, once the low water mark is hit it is
* refilled up to the high mark.
*/
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
{
lockdep_assert_held(&ent->lock);
if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
return;
if (ent->available_mrs < ent->limit) {
ent->fill_to_high_water = true;
queue_work(ent->dev->cache.wq, &ent->work);
} else if (ent->fill_to_high_water &&
ent->available_mrs + ent->pending < 2 * ent->limit) {
/*
* Once we start populating due to hitting a low water mark
* continue until we pass the high water mark.
*/
queue_work(ent->dev->cache.wq, &ent->work);
} else if (ent->available_mrs == 2 * ent->limit) {
ent->fill_to_high_water = false;
} else if (ent->available_mrs > 2 * ent->limit) {
/* Queue deletion of excess entries */
ent->fill_to_high_water = false;
if (ent->pending)
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(1000));
else
queue_work(ent->dev->cache.wq, &ent->work);
}
}
static void __cache_work_func(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_dev *dev = ent->dev;
struct mlx5_mr_cache *cache = &dev->cache;
int i = order2idx(dev, ent->order);
int err;
if (cache->stopped)
return;
spin_lock_irq(&ent->lock);
if (ent->disabled)
goto out;
ent = &dev->cache.ent[i];
if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
err = add_keys(dev, i, 1);
if (ent->cur < 2 * ent->limit) {
if (err == -EAGAIN) {
mlx5_ib_dbg(dev, "returned eagain, order %d\n",
i + 2);
queue_delayed_work(cache->wq, &ent->dwork,
msecs_to_jiffies(3));
} else if (err) {
mlx5_ib_warn(dev, "command failed order %d, err %d\n",
i + 2, err);
if (ent->fill_to_high_water &&
ent->available_mrs + ent->pending < 2 * ent->limit &&
!READ_ONCE(dev->fill_delay)) {
spin_unlock_irq(&ent->lock);
err = add_keys(ent, 1);
spin_lock_irq(&ent->lock);
if (ent->disabled)
goto out;
if (err) {
/*
* EAGAIN only happens if pending is positive, so we
* will be rescheduled from reg_mr_callback(). The only
* failure path here is ENOMEM.
*/
if (err != -EAGAIN) {
mlx5_ib_warn(
dev,
"command failed order %d, err %d\n",
ent->order, err);
queue_delayed_work(cache->wq, &ent->dwork,
msecs_to_jiffies(1000));
} else {
queue_work(cache->wq, &ent->work);
}
}
} else if (ent->cur > 2 * ent->limit) {
} else if (ent->available_mrs > 2 * ent->limit) {
bool need_delay;
/*
* The remove_keys() logic is performed as garbage collection
* task. Such task is intended to be run when no other active
* processes are running.
* The remove_cache_mr() logic is performed as garbage
* collection task. Such task is intended to be run when no
* other active processes are running.
*
* The need_resched() will return TRUE if there are user tasks
* to be activated in near future.
*
* In such case, we don't execute remove_keys() and postpone
* the garbage collection work to try to run in next cycle,
* in order to free CPU resources to other tasks.
* In such case, we don't execute remove_cache_mr() and postpone
* the garbage collection work to try to run in next cycle, in
* order to free CPU resources to other tasks.
*/
if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
queue_work(cache->wq, &ent->work);
} else {
spin_unlock_irq(&ent->lock);
need_delay = need_resched() || someone_adding(cache) ||
time_after(jiffies,
READ_ONCE(cache->last_add) + 300 * HZ);
spin_lock_irq(&ent->lock);
if (ent->disabled)
goto out;
if (need_delay)
queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
}
remove_cache_mr_locked(ent);
queue_adjust_cache_locked(ent);
}
out:
spin_unlock_irq(&ent->lock);
}
static void delayed_cache_work_func(struct work_struct *work)
@ -402,117 +532,95 @@ static void cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
/* Allocate a special entry from the cache */
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
unsigned int entry)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
int err;
if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
entry >= ARRAY_SIZE(cache->ent)))
return ERR_PTR(-EINVAL);
}
ent = &cache->ent[entry];
while (1) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
err = add_keys(dev, entry, 1);
if (err && err != -EAGAIN)
return ERR_PTR(err);
wait_for_completion(&ent->compl);
} else {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
list);
list_del(&mr->list);
ent->cur--;
spin_unlock_irq(&ent->lock);
if (ent->cur < ent->limit)
queue_work(cache->wq, &ent->work);
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
mr = create_cache_mr(ent);
if (IS_ERR(mr))
return mr;
}
} else {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->available_mrs--;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
}
return mr;
}
static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
/* Return a MR already available in the cache */
static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_ib_dev *dev = req_ent->dev;
struct mlx5_ib_mr *mr = NULL;
struct mlx5_cache_ent *ent;
int last_umr_cache_entry;
int c;
int i;
struct mlx5_cache_ent *ent = req_ent;
c = order2idx(dev, order);
last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
if (c < 0 || c > last_umr_cache_entry) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
for (i = c; i <= last_umr_cache_entry; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
/* Try larger MR pools from the cache to satisfy the allocation */
for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
ent - dev->cache.ent);
spin_lock_irq(&ent->lock);
if (!list_empty(&ent->head)) {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
list);
list_del(&mr->list);
ent->cur--;
ent->available_mrs--;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
if (ent->cur < ent->limit)
queue_work(cache->wq, &ent->work);
break;
}
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
queue_work(cache->wq, &ent->work);
}
if (!mr)
cache->ent[c].miss++;
req_ent->miss++;
return mr;
}
static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
{
struct mlx5_cache_ent *ent = mr->cache_ent;
mr->cache_ent = NULL;
spin_lock_irq(&ent->lock);
ent->total_mrs--;
spin_unlock_irq(&ent->lock);
}
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
int shrink = 0;
int c;
struct mlx5_cache_ent *ent = mr->cache_ent;
if (!mr->allocated_from_cache)
if (!ent)
return;
c = order2idx(dev, mr->order);
WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
if (mlx5_mr_cache_invalidate(mr)) {
mr->allocated_from_cache = false;
detach_mr_from_cache(mr);
destroy_mkey(dev, mr);
ent = &cache->ent[c];
if (ent->cur < ent->limit)
queue_work(cache->wq, &ent->work);
return;
}
ent = &cache->ent[c];
spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
ent->cur++;
if (ent->cur > 2 * ent->limit)
shrink = 1;
ent->available_mrs++;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
if (shrink)
queue_work(cache->wq, &ent->work);
}
static void clean_keys(struct mlx5_ib_dev *dev, int c)
@ -532,8 +640,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
ent->available_mrs--;
ent->total_mrs--;
spin_unlock_irq(&ent->lock);
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
@ -571,7 +679,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
dir = debugfs_create_dir(ent->name, cache->root);
debugfs_create_file("size", 0600, dir, ent, &size_fops);
debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
debugfs_create_u32("cur", 0400, dir, &ent->cur);
debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
debugfs_create_u32("miss", 0600, dir, &ent->miss);
}
}
@ -580,7 +688,7 @@ static void delay_time_func(struct timer_list *t)
{
struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
dev->fill_delay = 0;
WRITE_ONCE(dev->fill_delay, 0);
}
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
@ -606,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->dev = dev;
ent->limit = 0;
init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
@ -628,7 +735,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
ent->limit = 0;
queue_work(cache->wq, &ent->work);
spin_lock_irq(&ent->lock);
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
}
mlx5_mr_cache_debugfs_init(dev);
@ -638,13 +747,20 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
{
int i;
unsigned int i;
if (!dev->cache.wq)
return 0;
dev->cache.stopped = 1;
flush_workqueue(dev->cache.wq);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
struct mlx5_cache_ent *ent = &dev->cache.ent[i];
spin_lock_irq(&ent->lock);
ent->disabled = true;
spin_unlock_irq(&ent->lock);
cancel_work_sync(&ent->work);
cancel_delayed_work_sync(&ent->dwork);
}
mlx5_mr_cache_debugfs_cleanup(dev);
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
@ -685,7 +801,6 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
@ -707,7 +822,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
MLX5_SET(mkc, mkc, length64, 1);
set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err)
goto err_in;
@ -840,31 +955,37 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
return err;
}
static struct mlx5_ib_mr *alloc_mr_from_cache(
struct ib_pd *pd, struct ib_umem *umem,
u64 virt_addr, u64 len, int npages,
int page_shift, int order, int access_flags)
static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
unsigned int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
if (order < cache->ent[0].order)
return &cache->ent[0];
order = order - cache->ent[0].order;
if (order > MR_CACHE_LAST_STD_ENTRY)
return NULL;
return &cache->ent[order];
}
static struct mlx5_ib_mr *
alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
u64 len, int npages, int page_shift, unsigned int order,
int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
struct mlx5_ib_mr *mr;
int err = 0;
int i;
for (i = 0; i < 1; i++) {
mr = alloc_cached_mr(dev, order);
if (mr)
break;
err = add_keys(dev, order2idx(dev, order), 1);
if (err && err != -EAGAIN) {
mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
break;
}
if (!ent)
return ERR_PTR(-E2BIG);
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
if (IS_ERR(mr))
return mr;
}
if (!mr)
return ERR_PTR(-EAGAIN);
mr->ibmr.pd = pd;
mr->umem = umem;
mr->access_flags = access_flags;
@ -1097,7 +1218,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
get_octo_len(virt_addr, length, page_shift));
}
err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
@ -1137,7 +1258,6 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
@ -1160,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
MLX5_SET64(mkc, mkc, len, length);
set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err)
goto err_in;
@ -1439,10 +1559,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
/*
* UMR can't be used - MKey needs to be replaced.
*/
if (mr->allocated_from_cache)
err = mlx5_mr_cache_invalidate(mr);
else
err = destroy_mkey(dev, mr);
if (mr->cache_ent)
detach_mr_from_cache(mr);
err = destroy_mkey(dev, mr);
if (err)
goto err;
@ -1454,8 +1573,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
mr = to_mmr(ib_mr);
goto err;
}
mr->allocated_from_cache = false;
} else {
/*
* Send a UMR WQE
@ -1542,8 +1659,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int allocated_from_cache = mr->allocated_from_cache;
if (mr->sig) {
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_memory.psv_idx))
@ -1558,7 +1673,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mr->sig = NULL;
}
if (!allocated_from_cache) {
if (!mr->cache_ent) {
destroy_mkey(dev, mr);
mlx5_free_priv_descs(mr);
}
@ -1575,7 +1690,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
else
clean_mr(dev, mr);
if (mr->allocated_from_cache)
if (mr->cache_ent)
mlx5_mr_cache_free(dev, mr);
else
kfree(mr);
@ -1638,7 +1753,7 @@ static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err)
goto err_free_descs;
@ -1905,7 +2020,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
if (err)
goto free;

View File

@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
odp->private = NULL;
mutex_unlock(&odp->umem_mutex);
if (!mr->allocated_from_cache) {
if (!mr->cache_ent) {
mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
WARN_ON(mr->descs);
}

View File

@ -42,7 +42,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size);
MLX5_SET(encryption_key_obj, obj, key_type,
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK);
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS);
MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,

View File

@ -1282,7 +1282,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_init(&priv->alloc_mutex);
mutex_init(&priv->pgdir_mutex);
INIT_LIST_HEAD(&priv->pgdir_list);
spin_lock_init(&priv->mkey_lock);
priv->dbg_root = debugfs_create_dir(dev_name(dev->device),
mlx5_debugfs_root);

View File

@ -36,12 +36,9 @@
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey,
struct mlx5_async_ctx *async_ctx, u32 *in,
int inlen, u32 *out, int outlen,
mlx5_async_cbk_t callback,
struct mlx5_async_work *context)
int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey,
u32 *in, int inlen)
{
u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
u32 mkey_index;
@ -49,41 +46,23 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
int err;
u8 key;
spin_lock_irq(&dev->priv.mkey_lock);
key = dev->priv.mkey_key++;
spin_unlock_irq(&dev->priv.mkey_lock);
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
MLX5_SET(mkc, mkc, mkey_7_0, key);
if (callback)
return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
callback, context);
err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
if (err)
return err;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
mkey->size = MLX5_GET64(mkc, mkc, len);
mkey->key = mlx5_idx_to_mkey(mkey_index) | key;
mkey->key |= mlx5_idx_to_mkey(mkey_index);
mkey->pd = MLX5_GET(mkc, mkc, pd);
mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
mkey_index, key, mkey->key);
return 0;
}
EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey,
u32 *in, int inlen)
{
return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
NULL, 0, NULL, NULL);
}
EXPORT_SYMBOL(mlx5_core_create_mkey);
int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,

View File

@ -575,10 +575,6 @@ struct mlx5_priv {
/* end: alloc staff */
struct dentry *dbg_root;
/* protect mkey key part */
spinlock_t mkey_lock;
u8 mkey_key;
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
@ -947,12 +943,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
gfp_t flags, int npages);
void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
struct mlx5_cmd_mailbox *head);
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey,
struct mlx5_async_ctx *async_ctx, u32 *in,
int inlen, u32 *out, int outlen,
mlx5_async_cbk_t callback,
struct mlx5_async_work *context);
int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey,
u32 *in, int inlen);

View File

@ -414,7 +414,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reserved_at_16[0x1];
u8 table_miss_action_domain[0x1];
u8 termination_table[0x1];
u8 reserved_at_19[0x7];
u8 reformat_and_fwd_to_table[0x1];
u8 reserved_at_1a[0x6];
u8 reserved_at_20[0x2];
u8 log_max_ft_size[0x6];
u8 log_max_modify_header_context[0x8];
@ -741,7 +742,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
u8 flow_source[0x1];
u8 reserved_at_18[0x2];
u8 multi_fdb_encap[0x1];
u8 reserved_at_1b[0x1];
u8 egress_acl_forward_to_vport[0x1];
u8 fdb_multi_path_to_table[0x1];
u8 reserved_at_1d[0x3];
@ -8430,7 +8431,8 @@ struct mlx5_ifc_ptys_reg_bits {
u8 proto_mask[0x3];
u8 an_status[0x4];
u8 reserved_at_24[0x1c];
u8 reserved_at_24[0xc];
u8 data_rate_oper[0x10];
u8 ext_eth_proto_capability[0x20];
@ -10496,7 +10498,8 @@ enum {
};
enum {
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK = 0x1,
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1,
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2,
};
struct mlx5_ifc_tls_static_params_bits {