diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c index a92eb172f0e7..4ceced5cefcf 100644 --- a/drivers/nvdimm/virtio_pmem.c +++ b/drivers/nvdimm/virtio_pmem.c @@ -29,12 +29,27 @@ static int init_vq(struct virtio_pmem *vpmem) return 0; }; +static int virtio_pmem_validate(struct virtio_device *vdev) +{ + struct virtio_shm_region shm_reg; + + if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION) && + !virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID) + ) { + dev_notice(&vdev->dev, "failed to get shared memory region %d\n", + VIRTIO_PMEM_SHMEM_REGION_ID); + __virtio_clear_bit(vdev, VIRTIO_PMEM_F_SHMEM_REGION); + } + return 0; +} + static int virtio_pmem_probe(struct virtio_device *vdev) { struct nd_region_desc ndr_desc = {}; struct nd_region *nd_region; struct virtio_pmem *vpmem; struct resource res; + struct virtio_shm_region shm_reg; int err = 0; if (!vdev->config->get) { @@ -57,10 +72,16 @@ static int virtio_pmem_probe(struct virtio_device *vdev) goto out_err; } - virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, - start, &vpmem->start); - virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, - size, &vpmem->size); + if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION)) { + virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID); + vpmem->start = shm_reg.addr; + vpmem->size = shm_reg.len; + } else { + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, + start, &vpmem->start); + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, + size, &vpmem->size); + } res.start = vpmem->start; res.end = vpmem->start + vpmem->size - 1; @@ -122,10 +143,17 @@ static void virtio_pmem_remove(struct virtio_device *vdev) virtio_reset_device(vdev); } +static unsigned int features[] = { + VIRTIO_PMEM_F_SHMEM_REGION, +}; + static struct virtio_driver virtio_pmem_driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtio_pmem_validate, .probe = virtio_pmem_probe, .remove = virtio_pmem_remove, }; diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 9d1bdcdc1331..4cf20be668a6 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -37,6 +37,11 @@ #define VIRTIO_SCSI_EVENT_LEN 8 #define VIRTIO_SCSI_VQ_BASE 2 +static unsigned int virtscsi_poll_queues; +module_param(virtscsi_poll_queues, uint, 0644); +MODULE_PARM_DESC(virtscsi_poll_queues, + "The number of dedicated virtqueues for polling I/O"); + /* Command queue element */ struct virtio_scsi_cmd { struct scsi_cmnd *sc; @@ -76,6 +81,7 @@ struct virtio_scsi { struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN]; u32 num_queues; + int io_queues[HCTX_MAX_TYPES]; struct hlist_node node; @@ -722,9 +728,49 @@ static int virtscsi_abort(struct scsi_cmnd *sc) static void virtscsi_map_queues(struct Scsi_Host *shost) { struct virtio_scsi *vscsi = shost_priv(shost); - struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + int i, qoff; - blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2); + for (i = 0, qoff = 0; i < shost->nr_maps; i++) { + struct blk_mq_queue_map *map = &shost->tag_set.map[i]; + + map->nr_queues = vscsi->io_queues[i]; + map->queue_offset = qoff; + qoff += map->nr_queues; + + if (map->nr_queues == 0) + continue; + + /* + * Regular queues have interrupts and hence CPU affinity is + * defined by the core virtio code, but polling queues have + * no interrupts so we let the block layer assign CPU affinity. + */ + if (i == HCTX_TYPE_POLL) + blk_mq_map_queues(map); + else + blk_mq_virtio_map_queues(map, vscsi->vdev, 2); + } +} + +static int virtscsi_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) +{ + struct virtio_scsi *vscsi = shost_priv(shost); + struct virtio_scsi_vq *virtscsi_vq = &vscsi->req_vqs[queue_num]; + unsigned long flags; + unsigned int len; + int found = 0; + void *buf; + + spin_lock_irqsave(&virtscsi_vq->vq_lock, flags); + + while ((buf = virtqueue_get_buf(virtscsi_vq->vq, &len)) != NULL) { + virtscsi_complete_cmd(vscsi, buf); + found++; + } + + spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags); + + return found; } static void virtscsi_commit_rqs(struct Scsi_Host *shost, u16 hwq) @@ -751,6 +797,7 @@ static const struct scsi_host_template virtscsi_host_template = { .this_id = -1, .cmd_size = sizeof(struct virtio_scsi_cmd), .queuecommand = virtscsi_queuecommand, + .mq_poll = virtscsi_mq_poll, .commit_rqs = virtscsi_commit_rqs, .change_queue_depth = virtscsi_change_queue_depth, .eh_abort_handler = virtscsi_abort, @@ -795,13 +842,14 @@ static int virtscsi_init(struct virtio_device *vdev, { int err; u32 i; - u32 num_vqs; + u32 num_vqs, num_poll_vqs, num_req_vqs; vq_callback_t **callbacks; const char **names; struct virtqueue **vqs; struct irq_affinity desc = { .pre_vectors = 2 }; - num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE; + num_req_vqs = vscsi->num_queues; + num_vqs = num_req_vqs + VIRTIO_SCSI_VQ_BASE; vqs = kmalloc_array(num_vqs, sizeof(struct virtqueue *), GFP_KERNEL); callbacks = kmalloc_array(num_vqs, sizeof(vq_callback_t *), GFP_KERNEL); @@ -812,15 +860,31 @@ static int virtscsi_init(struct virtio_device *vdev, goto out; } + num_poll_vqs = min_t(unsigned int, virtscsi_poll_queues, + num_req_vqs - 1); + vscsi->io_queues[HCTX_TYPE_DEFAULT] = num_req_vqs - num_poll_vqs; + vscsi->io_queues[HCTX_TYPE_READ] = 0; + vscsi->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; + + dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", + vscsi->io_queues[HCTX_TYPE_DEFAULT], + vscsi->io_queues[HCTX_TYPE_READ], + vscsi->io_queues[HCTX_TYPE_POLL]); + callbacks[0] = virtscsi_ctrl_done; callbacks[1] = virtscsi_event_done; names[0] = "control"; names[1] = "event"; - for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++) { + for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs - num_poll_vqs; i++) { callbacks[i] = virtscsi_req_done; names[i] = "request"; } + for (; i < num_vqs; i++) { + callbacks[i] = NULL; + names[i] = "request_poll"; + } + /* Discover virtqueues and write information to configuration. */ err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); if (err) @@ -874,6 +938,7 @@ static int virtscsi_probe(struct virtio_device *vdev) sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1; shost->sg_tablesize = sg_elems; + shost->nr_maps = 1; vscsi = shost_priv(shost); vscsi->vdev = vdev; vscsi->num_queues = num_queues; @@ -883,6 +948,9 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; + if (vscsi->io_queues[HCTX_TYPE_POLL]) + shost->nr_maps = HCTX_TYPE_POLL + 1; + shost->can_queue = virtqueue_get_vring_size(vscsi->req_vqs[0].vq); cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index 5a09a09cca70..cce3d1837104 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -497,7 +497,7 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!eni_vdpa->vring) { ret = -ENOMEM; ENI_ERR(pdev, "failed to allocate virtqueues\n"); - goto err; + goto err_remove_vp_legacy; } for (i = 0; i < eni_vdpa->queues; i++) { @@ -509,11 +509,13 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues); if (ret) { ENI_ERR(pdev, "failed to register to vdpa bus\n"); - goto err; + goto err_remove_vp_legacy; } return 0; +err_remove_vp_legacy: + vp_legacy_remove(&eni_vdpa->ldev); err: put_device(&eni_vdpa->vdpa.dev); return ret; diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 84547d998bcf..50aac8fe57ef 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -35,6 +35,9 @@ struct mlx5_vdpa_mr { struct vhost_iotlb *iotlb; bool user_mr; + + refcount_t refcount; + struct list_head mr_list; }; struct mlx5_vdpa_resources { @@ -93,6 +96,7 @@ struct mlx5_vdpa_dev { u32 generation; struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; + struct list_head mr_list_head; /* serialize mr access */ struct mutex mr_mtx; struct mlx5_control_vq cvq; @@ -118,8 +122,10 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, - struct mlx5_vdpa_mr *mr); +void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr); +void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr); void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr, unsigned int asid); diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 2197c46e563a..4758914ccf86 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -498,32 +498,54 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { + if (WARN_ON(!mr)) + return; + if (mr->user_mr) destroy_user_mr(mvdev, mr); else destroy_dma_mr(mvdev, mr); vhost_iotlb_free(mr->iotlb); + + list_del(&mr->mr_list); + + kfree(mr); } -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, - struct mlx5_vdpa_mr *mr) +static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) { if (!mr) return; + if (refcount_dec_and_test(&mr->refcount)) + _mlx5_vdpa_destroy_mr(mvdev, mr); +} + +void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ mutex_lock(&mvdev->mr_mtx); - - _mlx5_vdpa_destroy_mr(mvdev, mr); - - for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) { - if (mvdev->mr[i] == mr) - mvdev->mr[i] = NULL; - } - + _mlx5_vdpa_put_mr(mvdev, mr); mutex_unlock(&mvdev->mr_mtx); +} - kfree(mr); +static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + if (!mr) + return; + + refcount_inc(&mr->refcount); +} + +void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + mutex_lock(&mvdev->mr_mtx); + _mlx5_vdpa_get_mr(mvdev, mr); + mutex_unlock(&mvdev->mr_mtx); } void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, @@ -534,10 +556,23 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, mutex_lock(&mvdev->mr_mtx); + _mlx5_vdpa_put_mr(mvdev, old_mr); mvdev->mr[asid] = new_mr; - if (old_mr) { - _mlx5_vdpa_destroy_mr(mvdev, old_mr); - kfree(old_mr); + + mutex_unlock(&mvdev->mr_mtx); +} + +static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr *mr; + + mutex_lock(&mvdev->mr_mtx); + + list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) { + + mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " + "mr: %p, mkey: 0x%x, refcount: %u\n", + mr, mr->mkey, refcount_read(&mr->refcount)); } mutex_unlock(&mvdev->mr_mtx); @@ -547,9 +582,11 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) { for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) - mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[i]); + mlx5_vdpa_update_mr(mvdev, NULL, i); prune_iotlb(mvdev->cvq.iotlb); + + mlx5_vdpa_show_mr_leaks(mvdev); } static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, @@ -576,6 +613,8 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto err_iotlb; + list_add_tail(&mr->mr_list, &mvdev->mr_list_head); + return 0; err_iotlb: @@ -607,6 +646,8 @@ struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto out_err; + refcount_set(&mr->refcount, 1); + return mr; out_err: @@ -651,7 +692,7 @@ int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) if (asid >= MLX5_VDPA_NUM_AS) return -EINVAL; - mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[asid]); + mlx5_vdpa_update_mr(mvdev, NULL, asid); if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { if (mlx5_vdpa_create_dma_mr(mvdev)) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 26ba7da6b410..778821bab7d9 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -120,6 +120,12 @@ struct mlx5_vdpa_virtqueue { u16 avail_idx; u16 used_idx; int fw_state; + + u64 modified_fields; + + struct mlx5_vdpa_mr *vq_mr; + struct mlx5_vdpa_mr *desc_mr; + struct msi_map map; /* keep last in the struct */ @@ -943,6 +949,14 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque kfree(in); mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + mlx5_vdpa_get_mr(mvdev, vq_mr); + mvq->vq_mr = vq_mr; + + if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { + mlx5_vdpa_get_mr(mvdev, vq_desc_mr); + mvq->desc_mr = vq_desc_mr; + } + return 0; err_cmd: @@ -969,6 +983,12 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq } mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; umems_destroy(ndev, mvq); + + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr); + mvq->vq_mr = NULL; + + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr); + mvq->desc_mr = NULL; } static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) @@ -1167,7 +1187,12 @@ err_cmd: return err; } -static bool is_valid_state_change(int oldstate, int newstate) +static bool is_resumable(struct mlx5_vdpa_net *ndev) +{ + return ndev->mvdev.vdev.config->resume; +} + +static bool is_valid_state_change(int oldstate, int newstate, bool resumable) { switch (oldstate) { case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: @@ -1175,25 +1200,43 @@ static bool is_valid_state_change(int oldstate, int newstate) case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: + return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: default: return false; } } -static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) +static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) +{ + /* Only state is always modifiable */ + if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE) + return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT || + mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; + + return true; +} + +static int modify_virtqueue(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state) { int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_vdpa_mr *desc_mr = NULL; + struct mlx5_vdpa_mr *vq_mr = NULL; + bool state_change = false; void *obj_context; void *cmd_hdr; + void *vq_ctx; void *in; int err; if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) return 0; - if (!is_valid_state_change(mvq->fw_state, state)) + if (!modifiable_virtqueue_fields(mvq)) return -EINVAL; in = kzalloc(inlen, GFP_KERNEL); @@ -1208,17 +1251,83 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); - MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, - MLX5_VIRTQ_MODIFY_MASK_STATE); - MLX5_SET(virtio_net_q_object, obj_context, state, state); + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) { + if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { + err = -EINVAL; + goto done; + } + + MLX5_SET(virtio_net_q_object, obj_context, state, state); + state_change = true; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX) + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + + if (vq_mr) + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); + else + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + + if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) + MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); + else + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; + } + + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); - kfree(in); - if (!err) + if (err) + goto done; + + if (state_change) mvq->fw_state = state; + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); + mlx5_vdpa_get_mr(mvdev, vq_mr); + mvq->vq_mr = vq_mr; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); + mlx5_vdpa_get_mr(mvdev, desc_mr); + mvq->desc_mr = desc_mr; + } + + mvq->modified_fields = 0; + +done: + kfree(in); return err; } +static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + unsigned int state) +{ + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; + return modify_virtqueue(ndev, mvq, state); +} + static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; @@ -1347,7 +1456,7 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) goto err_vq; if (mvq->ready) { - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); if (err) { mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", idx, err); @@ -1382,7 +1491,7 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) return; - if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) + if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); if (query_virtqueue(ndev, mvq, &attr)) { @@ -1401,12 +1510,31 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev) suspend_vq(ndev, &ndev->vqs[i]); } +static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + if (!mvq->initialized || !is_resumable(ndev)) + return; + + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND) + return; + + if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)) + mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index); +} + +static void resume_vqs(struct mlx5_vdpa_net *ndev) +{ + for (int i = 0; i < ndev->mvdev.max_vqs; i++) + resume_vq(ndev, &ndev->vqs[i]); +} + static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { if (!mvq->initialized) return; suspend_vq(ndev, mvq); + mvq->modified_fields = 0; destroy_virtqueue(ndev, mvq); dealloc_vector(ndev, mvq); counter_set_dealloc(ndev, mvq); @@ -2138,6 +2266,7 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_ mvq->desc_addr = desc_area; mvq->device_addr = device_area; mvq->driver_addr = driver_area; + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS; return 0; } @@ -2207,7 +2336,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready if (!ready) { suspend_vq(ndev, mvq); } else { - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); if (err) { mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); ready = false; @@ -2255,6 +2384,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, mvq->used_idx = state->split.avail_index; mvq->avail_idx = state->split.avail_index; + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX; return 0; } @@ -2703,24 +2834,35 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, unsigned int asid) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool teardown = !is_resumable(ndev); int err; suspend_vqs(ndev); - err = save_channels_info(ndev); - if (err) - return err; + if (teardown) { + err = save_channels_info(ndev); + if (err) + return err; - teardown_driver(ndev); + teardown_driver(ndev); + } mlx5_vdpa_update_mr(mvdev, new_mr, asid); + for (int i = 0; i < ndev->cur_num_vqs; i++) + ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) return 0; - restore_channels_info(ndev); - err = setup_driver(mvdev); - if (err) - return err; + if (teardown) { + restore_channels_info(ndev); + err = setup_driver(mvdev); + if (err) + return err; + } + + resume_vqs(ndev); return 0; } @@ -2804,8 +2946,10 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) { int i; - for (i = 0; i < ndev->mvdev.max_vqs; i++) + for (i = 0; i < ndev->mvdev.max_vqs; i++) { ndev->vqs[i].ready = false; + ndev->vqs[i].modified_fields = 0; + } ndev->mvdev.cvq.ready = false; } @@ -2982,7 +3126,7 @@ static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); out_err: - mlx5_vdpa_destroy_mr(mvdev, new_mr); + mlx5_vdpa_put_mr(mvdev, new_mr); return err; } @@ -3229,6 +3373,23 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) return 0; } +static int mlx5_vdpa_resume(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev; + + ndev = to_mlx5_vdpa_ndev(mvdev); + + mlx5_vdpa_info(mvdev, "resuming device\n"); + + down_write(&ndev->reslock); + mvdev->suspended = false; + resume_vqs(ndev); + register_link_notifier(ndev); + up_write(&ndev->reslock); + return 0; +} + static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, unsigned int asid) { @@ -3285,6 +3446,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vq_dma_dev = mlx5_get_vq_dma_dev, .free = mlx5_vdpa_free, .suspend = mlx5_vdpa_suspend, + .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ }; static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) @@ -3560,6 +3722,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, if (err) goto err_mpfs; + INIT_LIST_HEAD(&mvdev->mr_list_head); + if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { err = mlx5_vdpa_create_dma_mr(mvdev); if (err) @@ -3656,6 +3820,9 @@ static int mlx5v_probe(struct auxiliary_device *adev, if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) mgtdev->vdpa_ops.get_vq_desc_group = NULL; + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported)) + mgtdev->vdpa_ops.resume = NULL; + err = vdpa_mgmtdev_register(&mgtdev->mgtdev); if (err) goto reg_err; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index a7612e0783b3..d0695680b282 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -131,7 +131,7 @@ static void vdpa_release_dev(struct device *d) if (ops->free) ops->free(vdev); - ida_simple_remove(&vdpa_index_ida, vdev->index); + ida_free(&vdpa_index_ida, vdev->index); kfree(vdev->driver_override); kfree(vdev); } @@ -205,7 +205,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, return vdev; err_name: - ida_simple_remove(&vdpa_index_ida, vdev->index); + ida_free(&vdpa_index_ida, vdev->index); err_ida: kfree(vdev); err: diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 173beda74b38..bc4a51e4638b 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -59,6 +59,7 @@ struct vhost_vdpa { int in_batch; struct vdpa_iova_range range; u32 batch_asid; + bool suspended; }; static DEFINE_IDA(vhost_vdpa_ida); @@ -232,6 +233,8 @@ static int _compat_vdpa_reset(struct vhost_vdpa *v) struct vdpa_device *vdpa = v->vdpa; u32 flags = 0; + v->suspended = false; + if (v->vdev.vqs) { flags |= !vhost_backend_has_feature(v->vdev.vqs[0], VHOST_BACKEND_F_IOTLB_PERSIST) ? @@ -590,11 +593,16 @@ static long vhost_vdpa_suspend(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + int ret; if (!ops->suspend) return -EOPNOTSUPP; - return ops->suspend(vdpa); + ret = ops->suspend(vdpa); + if (!ret) + v->suspended = true; + + return ret; } /* After a successful return of this ioctl the device resumes processing @@ -605,11 +613,16 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + int ret; if (!ops->resume) return -EOPNOTSUPP; - return ops->resume(vdpa); + ret = ops->resume(vdpa); + if (!ret) + v->suspended = false; + + return ret; } static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, @@ -690,6 +703,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, switch (cmd) { case VHOST_SET_VRING_ADDR: + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + if (ops->set_vq_address(vdpa, idx, (u64)(uintptr_t)vq->desc, (u64)(uintptr_t)vq->avail, @@ -698,6 +714,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); @@ -968,7 +987,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, r = ops->set_map(vdpa, asid, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, - perm_to_iommu_flags(perm), GFP_KERNEL); + perm_to_iommu_flags(perm), + GFP_KERNEL_ACCOUNT); } if (r) { vhost_iotlb_del_range(iotlb, iova, iova + size - 1); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 59cdc0292dce..1f5b3dd31fcf 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -119,6 +119,11 @@ struct virtio_balloon { /* Free page reporting device */ struct virtqueue *reporting_vq; struct page_reporting_dev_info pr_dev_info; + + /* State for keeping the wakeup_source active while adjusting the balloon */ + spinlock_t adjustment_lock; + bool adjustment_signal_pending; + bool adjustment_in_progress; }; static const struct virtio_device_id id_table[] = { @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) queue_work(vb->balloon_wq, &vb->report_free_page_work); } +static void start_update_balloon_size(struct virtio_balloon *vb) +{ + unsigned long flags; + + spin_lock_irqsave(&vb->adjustment_lock, flags); + vb->adjustment_signal_pending = true; + if (!vb->adjustment_in_progress) { + vb->adjustment_in_progress = true; + pm_stay_awake(vb->vdev->dev.parent); + } + spin_unlock_irqrestore(&vb->adjustment_lock, flags); + + queue_work(system_freezable_wq, &vb->update_balloon_size_work); +} + +static void end_update_balloon_size(struct virtio_balloon *vb) +{ + spin_lock_irq(&vb->adjustment_lock); + if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { + vb->adjustment_in_progress = false; + pm_relax(vb->vdev->dev.parent); + } + spin_unlock_irq(&vb->adjustment_lock); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev) spin_lock_irqsave(&vb->stop_update_lock, flags); if (!vb->stop_update) { - queue_work(system_freezable_wq, - &vb->update_balloon_size_work); + start_update_balloon_size(vb); virtio_balloon_queue_free_page_work(vb); } spin_unlock_irqrestore(&vb->stop_update_lock, flags); @@ -476,19 +505,25 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); + + spin_lock_irq(&vb->adjustment_lock); + vb->adjustment_signal_pending = false; + spin_unlock_irq(&vb->adjustment_lock); + diff = towards_target(vb); - if (!diff) - return; - - if (diff > 0) - diff -= fill_balloon(vb, diff); - else - diff += leak_balloon(vb, -diff); - update_balloon_size(vb); + if (diff) { + if (diff > 0) + diff -= fill_balloon(vb, diff); + else + diff += leak_balloon(vb, -diff); + update_balloon_size(vb); + } if (diff) queue_work(system_freezable_wq, work); + else + end_update_balloon_size(vb); } static int init_vqs(struct virtio_balloon *vb) @@ -992,6 +1027,8 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_unregister_oom; } + spin_lock_init(&vb->adjustment_lock); + virtio_device_ready(vdev); if (towards_target(vb)) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 9f93330fc2cc..b655fccaf773 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -495,8 +495,40 @@ static int virtio_pci_restore(struct device *dev) return virtio_device_restore(&vp_dev->vdev); } +static bool vp_supports_pm_no_reset(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + u16 pmcsr; + + if (!pci_dev->pm_cap) + return false; + + pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, &pmcsr); + if (PCI_POSSIBLE_ERROR(pmcsr)) { + dev_err(dev, "Unable to query pmcsr"); + return false; + } + + return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; +} + +static int virtio_pci_suspend(struct device *dev) +{ + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); +} + +static int virtio_pci_resume(struct device *dev) +{ + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); +} + static const struct dev_pm_ops virtio_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) + .suspend = virtio_pci_suspend, + .resume = virtio_pci_resume, + .freeze = virtio_pci_freeze, + .thaw = virtio_pci_restore, + .poweroff = virtio_pci_freeze, + .restore = virtio_pci_restore, }; #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7235f3eaed8b..bf5320b28b8b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1241,7 +1241,8 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 reserved_at_c0[0x13]; u8 desc_group_mkey_supported[0x1]; - u8 reserved_at_d4[0xc]; + u8 freeze_to_rdy_supported[0x1]; + u8 reserved_at_d5[0xb]; u8 reserved_at_e0[0x20]; diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index b86d51a855f6..40371c916cf9 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -145,6 +145,10 @@ enum { MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS = (u64)1 << 6, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX = (u64)1 << 7, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11, MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h index d676b3620383..ede4f3564977 100644 --- a/include/uapi/linux/virtio_pmem.h +++ b/include/uapi/linux/virtio_pmem.h @@ -14,6 +14,13 @@ #include #include +/* Feature bits */ +/* guest physical address range will be indicated as shared memory region 0 */ +#define VIRTIO_PMEM_F_SHMEM_REGION 0 + +/* shmid of the shared memory region corresponding to the pmem */ +#define VIRTIO_PMEM_SHMEM_REGION_ID 0 + struct virtio_pmem_config { __le64 start; __le64 size;