vfio/mlx5: Create and destroy page tracker object

Add support for creating and destroying page tracker object.

This object is used to control/report the device dirty pages.

As part of creating the tracker need to consider the device capabilities
for max ranges and adapt/combine ranges accordingly.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20220908183448.195262-8-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Yishai Hadas 2022-09-08 21:34:45 +03:00 committed by Alex Williamson
parent 79c3cf2799
commit c1d050b0d1
2 changed files with 148 additions and 0 deletions

View file

@ -410,6 +410,148 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
return err;
}
static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
u32 req_nodes)
{
struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
unsigned long min_gap;
unsigned long curr_gap;
/* Special shortcut when a single range is required */
if (req_nodes == 1) {
unsigned long last;
curr = comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
while (curr) {
last = curr->last;
prev = curr;
curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
if (prev != comb_start)
interval_tree_remove(prev, root);
}
comb_start->last = last;
return;
}
/* Combine ranges which have the smallest gap */
while (cur_nodes > req_nodes) {
prev = NULL;
min_gap = ULONG_MAX;
curr = interval_tree_iter_first(root, 0, ULONG_MAX);
while (curr) {
if (prev) {
curr_gap = curr->start - prev->last;
if (curr_gap < min_gap) {
min_gap = curr_gap;
comb_start = prev;
comb_end = curr;
}
}
prev = curr;
curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
}
comb_start->last = comb_end->last;
interval_tree_remove(comb_end, root);
cur_nodes--;
}
}
static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
struct mlx5vf_pci_core_device *mvdev,
struct rb_root_cached *ranges, u32 nnodes)
{
int max_num_range =
MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range);
struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
int record_size = MLX5_ST_SZ_BYTES(page_track_range);
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
struct interval_tree_node *node = NULL;
u64 total_ranges_len = 0;
u32 num_ranges = nnodes;
u8 log_addr_space_size;
void *range_list_ptr;
void *obj_context;
void *cmd_hdr;
int inlen;
void *in;
int err;
int i;
if (num_ranges > max_num_range) {
combine_ranges(ranges, nnodes, max_num_range);
num_ranges = max_num_range;
}
inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) +
record_size * num_ranges;
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in,
general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode,
MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type,
MLX5_OBJ_TYPE_PAGE_TRACK);
obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context);
MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id);
MLX5_SET(page_track, obj_context, track_type, 1);
MLX5_SET(page_track, obj_context, log_page_size,
ilog2(tracker->host_qp->tracked_page_size));
MLX5_SET(page_track, obj_context, log_msg_size,
ilog2(tracker->host_qp->max_msg_size));
MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn);
MLX5_SET(page_track, obj_context, num_ranges, num_ranges);
range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range);
node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
for (i = 0; i < num_ranges; i++) {
void *addr_range_i_base = range_list_ptr + record_size * i;
unsigned long length = node->last - node->start;
MLX5_SET64(page_track_range, addr_range_i_base, start_address,
node->start);
MLX5_SET64(page_track_range, addr_range_i_base, length, length);
total_ranges_len += length;
node = interval_tree_iter_next(node, 0, ULONG_MAX);
}
WARN_ON(node);
log_addr_space_size = ilog2(total_ranges_len);
if (log_addr_space_size <
(MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) ||
log_addr_space_size >
(MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) {
err = -EOPNOTSUPP;
goto out;
}
MLX5_SET(page_track, obj_context, log_addr_space_size,
log_addr_space_size);
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
if (err)
goto out;
tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
out:
kfree(in);
return err;
}
static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev,
u32 tracker_id)
{
u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id);
return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
}
static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
struct mlx5_vhca_cq_buf *buf, int nent,
int cqe_size)
@ -833,6 +975,7 @@ _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev)
WARN_ON(mvdev->mdev_detach);
mlx5vf_cmd_destroy_tracker(mdev, tracker->id);
mlx5vf_destroy_qp(mdev, tracker->fw_qp);
mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp);
mlx5vf_destroy_qp(mdev, tracker->host_qp);
@ -941,6 +1084,10 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
tracker->host_qp = host_qp;
tracker->fw_qp = fw_qp;
err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes);
if (err)
goto err_activate;
*page_size = host_qp->tracked_page_size;
mvdev->log_active = true;
mlx5vf_state_mutex_unlock(mvdev);

View file

@ -80,6 +80,7 @@ struct mlx5_vhca_qp {
};
struct mlx5_vhca_page_tracker {
u32 id;
u32 pdn;
struct mlx5_uars_page *uar;
struct mlx5_vhca_cq cq;