diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index ae910347b7cf..4a3cfc7940de 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -950,7 +950,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) unsigned long to; bool active = true; - blk_mq_stop_hw_queues(port->dd->queue); + blk_mq_quiesce_queue(port->dd->queue); to = jiffies + msecs_to_jiffies(timeout); do { @@ -970,10 +970,10 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) break; } while (time_before(jiffies, to)); - blk_mq_start_stopped_hw_queues(port->dd->queue, true); + blk_mq_unquiesce_queue(port->dd->queue); return active ? -EBUSY : 0; err_fault: - blk_mq_start_stopped_hw_queues(port->dd->queue, true); + blk_mq_unquiesce_queue(port->dd->queue); return -EFAULT; } @@ -2737,6 +2737,9 @@ static void mtip_abort_cmd(struct request *req, void *data, struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; + if (!blk_mq_request_started(req)) + return; + dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); clear_bit(req->tag, dd->port->cmds_to_issue); @@ -2749,6 +2752,9 @@ static void mtip_queue_cmd(struct request *req, void *data, { struct driver_data *dd = data; + if (!blk_mq_request_started(req)) + return; + set_bit(req->tag, dd->port->cmds_to_issue); blk_abort_request(req); } @@ -2814,6 +2820,8 @@ static int mtip_service_thread(void *data) dev_warn(&dd->pdev->dev, "Completion workers still active!"); + blk_mq_quiesce_queue(dd->queue); + spin_lock(dd->queue->queue_lock); blk_mq_tagset_busy_iter(&dd->tags, mtip_queue_cmd, dd); @@ -2826,6 +2834,8 @@ static int mtip_service_thread(void *data) mtip_abort_cmd, dd); clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags); + + blk_mq_unquiesce_queue(dd->queue); } if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { @@ -3995,8 +4005,9 @@ static int mtip_block_remove(struct driver_data *dd) dd->disk->disk_name); blk_freeze_queue_start(dd->queue); - blk_mq_stop_hw_queues(dd->queue); + blk_mq_quiesce_queue(dd->queue); blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); + blk_mq_unquiesce_queue(dd->queue); /* * Delete our gendisk structure. This also removes the device diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 977ec960dd2f..dea7d85134ee 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -661,9 +661,9 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved) static void nbd_clear_que(struct nbd_device *nbd) { - blk_mq_stop_hw_queues(nbd->disk->queue); + blk_mq_quiesce_queue(nbd->disk->queue); blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); - blk_mq_start_hw_queues(nbd->disk->queue); + blk_mq_unquiesce_queue(nbd->disk->queue); dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0297ad7c1452..4e02aa5fdac0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -840,7 +840,7 @@ static int virtblk_freeze(struct virtio_device *vdev) /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_stop_hw_queues(vblk->disk->queue); + blk_mq_quiesce_queue(vblk->disk->queue); vdev->config->del_vqs(vdev); return 0; @@ -857,7 +857,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + blk_mq_unquiesce_queue(vblk->disk->queue); return 0; } #endif diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d70df1d0072d..cb96f4a7ae3a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -131,7 +131,7 @@ void nvme_complete_rq(struct request *req) { if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { nvme_req(req)->retries++; - blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); + blk_mq_requeue_request(req, true); return; } @@ -2591,12 +2591,29 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl) spin_unlock(&dev_list_lock); } -void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) +void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { + nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); - nvme_remove_namespaces(ctrl); +} +EXPORT_SYMBOL_GPL(nvme_stop_ctrl); +void nvme_start_ctrl(struct nvme_ctrl *ctrl) +{ + if (ctrl->kato) + nvme_start_keep_alive(ctrl); + + if (ctrl->queue_count > 1) { + nvme_queue_scan(ctrl); + nvme_queue_async_events(ctrl); + nvme_start_queues(ctrl); + } +} +EXPORT_SYMBOL_GPL(nvme_start_ctrl); + +void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) +{ device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); spin_lock(&dev_list_lock); @@ -2694,9 +2711,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ctrl->admin_q); - /* Forcibly start all queues to avoid having stuck requests */ - blk_mq_start_hw_queues(ctrl->admin_q); - list_for_each_entry(ns, &ctrl->namespaces, list) { /* * Revalidating a dead namespace sets capacity to 0. This will @@ -2709,16 +2723,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); - - /* - * Forcibly start all queues to avoid having stuck requests. - * Note that we must ensure the queues are not stopped - * when the final removal happens. - */ - blk_mq_start_hw_queues(ns->queue); - - /* draining requests in requeue list */ - blk_mq_kick_requeue_list(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } @@ -2787,10 +2791,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->namespaces_mutex); - list_for_each_entry(ns, &ctrl->namespaces, list) { + list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unquiesce_queue(ns->queue); - blk_mq_kick_requeue_list(ns->queue); - } mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_queues); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ed87214fdc0e..d666ada39a9b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -148,13 +148,10 @@ struct nvme_fc_ctrl { struct device *dev; struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - u32 queue_count; u32 cnum; u64 association_id; - u64 cap; - struct list_head ctrl_list; /* rport->ctrl_list */ struct blk_mq_tag_set admin_tag_set; @@ -1614,7 +1611,7 @@ nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvme_fc_free_queue(&ctrl->queues[i]); } @@ -1635,10 +1632,10 @@ __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, static void nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) { - struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1]; + struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; int i; - for (i = ctrl->queue_count - 1; i >= 1; i--, queue--) + for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) __nvme_fc_delete_hw_queue(ctrl, queue, i); } @@ -1648,7 +1645,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) struct nvme_fc_queue *queue = &ctrl->queues[1]; int i, ret; - for (i = 1; i < ctrl->queue_count; i++, queue++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); if (ret) goto delete_queues; @@ -1667,7 +1664,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) { int i, ret = 0; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, (qsize / 5)); if (ret) @@ -1685,7 +1682,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); } @@ -1706,6 +1703,7 @@ nvme_fc_ctrl_free(struct kref *ref) list_del(&ctrl->ctrl_list); spin_unlock_irqrestore(&ctrl->rport->lock, flags); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); @@ -1969,10 +1967,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (ret != -EBUSY) return BLK_STS_IOERR; - if (op->rq) { - blk_mq_stop_hw_queues(op->rq->q); - blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY); - } + if (op->rq) + blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY); + return BLK_STS_RESOURCE; } @@ -2178,17 +2175,20 @@ static int nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + ctrl->lport->ops->max_hw_queues); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { dev_info(ctrl->ctrl.device, "set_queue_count failed: %d\n", ret); return ret; } - ctrl->queue_count = opts->nr_io_queues + 1; - if (!opts->nr_io_queues) + ctrl->ctrl.queue_count = nr_io_queues + 1; + if (!nr_io_queues) return 0; nvme_fc_init_io_queues(ctrl); @@ -2204,7 +2204,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) sizeof(struct scatterlist)) + ctrl->lport->ops->fcprqst_priv_sz; ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ret = blk_mq_alloc_tag_set(&ctrl->tag_set); @@ -2232,7 +2232,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); out_cleanup_blk_queue: - nvme_stop_keep_alive(&ctrl->ctrl); blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: blk_mq_free_tag_set(&ctrl->tag_set); @@ -2248,17 +2247,21 @@ static int nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + ctrl->lport->ops->max_hw_queues); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { dev_info(ctrl->ctrl.device, "set_queue_count failed: %d\n", ret); return ret; } + ctrl->ctrl.queue_count = nr_io_queues + 1; /* check for io queues existing */ - if (ctrl->queue_count == 1) + if (ctrl->ctrl.queue_count == 1) return 0; nvme_fc_init_io_queues(ctrl); @@ -2275,6 +2278,8 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_delete_hw_queues; + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + return 0; out_delete_hw_queues: @@ -2316,7 +2321,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) goto out_delete_hw_queue; if (ctrl->ctrl.state != NVME_CTRL_NEW) - blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ret = nvmf_connect_admin_queue(&ctrl->ctrl); if (ret) @@ -2329,7 +2334,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) * prior connection values */ - ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (ret) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); @@ -2337,9 +2342,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize); - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (ret) goto out_disconnect_admin_queue; @@ -2360,8 +2365,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) goto out_disconnect_admin_queue; } - nvme_start_keep_alive(&ctrl->ctrl); - /* FC-NVME supports normal SGL Data Block Descriptors */ if (opts->queue_size > ctrl->ctrl.maxcmd) { @@ -2381,7 +2384,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) * Create the io queues */ - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.state == NVME_CTRL_NEW) ret = nvme_fc_create_io_queues(ctrl); else @@ -2395,17 +2398,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.nr_reconnects = 0; - if (ctrl->queue_count > 1) { - nvme_start_queues(&ctrl->ctrl); - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return 0; /* Success */ out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); - nvme_stop_keep_alive(&ctrl->ctrl); out_disconnect_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); @@ -2428,8 +2426,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { unsigned long flags; - nvme_stop_keep_alive(&ctrl->ctrl); - spin_lock_irqsave(&ctrl->lock, flags); ctrl->flags |= FCCTRL_TERMIO; ctrl->iocnt = 0; @@ -2447,7 +2443,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) * io requests back to the block layer as part of normal completions * (but with error status). */ - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); @@ -2470,7 +2466,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) * use blk_mq_tagset_busy_itr() and the transport routine to * terminate the exchanges. */ - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); @@ -2511,7 +2507,8 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); - + nvme_stop_ctrl(&ctrl->ctrl); + nvme_remove_namespaces(&ctrl->ctrl); /* * kill the association on the link side. this will block * waiting for io to terminate @@ -2606,6 +2603,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); int ret; + nvme_stop_ctrl(&ctrl->ctrl); /* will block will waiting for io to terminate */ nvme_fc_delete_association(ctrl); @@ -2702,18 +2700,17 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, spin_lock_init(&ctrl->lock); /* io queue count */ - ctrl->queue_count = min_t(unsigned int, + ctrl->ctrl.queue_count = min_t(unsigned int, opts->nr_io_queues, lport->ops->max_hw_queues); - opts->nr_io_queues = ctrl->queue_count; /* so opts has valid value */ - ctrl->queue_count++; /* +1 for admin queue */ + ctrl->ctrl.queue_count++; /* +1 for admin queue */ ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ret = -ENOMEM; - ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), - GFP_KERNEL); + ctrl->queues = kcalloc(ctrl->ctrl.queue_count, + sizeof(struct nvme_fc_queue), GFP_KERNEL); if (!ctrl->queues) goto out_free_ida; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d70ff0fdd36b..8f2a168ddc01 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -142,7 +142,9 @@ struct nvme_ctrl { u16 cntlid; u32 ctrl_config; + u32 queue_count; + u64 cap; u32 page_size; u32 max_hw_sectors; u16 oncs; @@ -278,6 +280,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, const struct nvme_ctrl_ops *ops, unsigned long quirks); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); +void nvme_start_ctrl(struct nvme_ctrl *ctrl); +void nvme_stop_ctrl(struct nvme_ctrl *ctrl); void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 33c3b9db7d36..7639ab67c7a2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -35,7 +35,6 @@ #include "nvme.h" -#define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) @@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444); MODULE_PARM_DESC(max_host_mem_size_mb, "Maximum Host Memory Buffer (HMB) size per controller (in MiB)"); +static int io_queue_depth_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops io_queue_depth_ops = { + .set = io_queue_depth_set, + .get = param_get_int, +}; + +static int io_queue_depth = 1024; +module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); +MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); + struct nvme_dev; struct nvme_queue; @@ -74,7 +83,6 @@ struct nvme_dev { struct device *dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; - unsigned queue_count; unsigned online_queues; unsigned max_qid; int q_depth; @@ -105,6 +113,17 @@ struct nvme_dev { void **host_mem_desc_bufs; }; +static int io_queue_depth_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < 2) + return -EINVAL; + + return param_set_int(val, kp); +} + static inline unsigned int sq_idx(unsigned int qid, u32 stride) { return qid * 2 * stride; @@ -1099,9 +1118,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) { int i; - for (i = dev->queue_count - 1; i >= lowest; i--) { + for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) { struct nvme_queue *nvmeq = dev->queues[i]; - dev->queue_count--; + dev->ctrl.queue_count--; dev->queues[i] = NULL; nvme_free_queue(nvmeq); } @@ -1126,7 +1145,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) spin_unlock_irq(&nvmeq->q_lock); if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) - blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q); + blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq); @@ -1145,8 +1164,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else - nvme_disable_ctrl(&dev->ctrl, lo_hi_readq( - dev->bar + NVME_REG_CAP)); + nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); spin_lock_irq(&nvmeq->q_lock); nvme_process_cq(nvmeq); @@ -1221,7 +1239,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->qid = qid; nvmeq->cq_vector = -1; dev->queues[qid] = nvmeq; - dev->queue_count++; + dev->ctrl.queue_count++; return nvmeq; @@ -1317,7 +1335,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * user requests may be waiting on a stopped queue. Start the * queue to flush these to completion. */ - blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); + blk_mq_unquiesce_queue(dev->ctrl.admin_q); blk_cleanup_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } @@ -1354,7 +1372,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) return -ENODEV; } } else - blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); + blk_mq_unquiesce_queue(dev->ctrl.admin_q); return 0; } @@ -1385,11 +1403,10 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size) return 0; } -static int nvme_configure_admin_queue(struct nvme_dev *dev) +static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) { int result; u32 aqa; - u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); struct nvme_queue *nvmeq; result = nvme_remap_bar(dev, db_bar_size(dev, 0)); @@ -1397,13 +1414,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? - NVME_CAP_NSSRC(cap) : 0; + NVME_CAP_NSSRC(dev->ctrl.cap) : 0; if (dev->subsystem && (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); - result = nvme_disable_ctrl(&dev->ctrl, cap); + result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); if (result < 0) return result; @@ -1422,7 +1439,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); - result = nvme_enable_ctrl(&dev->ctrl, cap); + result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap); if (result) return result; @@ -1441,7 +1458,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) unsigned i, max; int ret = 0; - for (i = dev->queue_count; i <= dev->max_qid; i++) { + for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { /* vector == qid - 1, match nvme_create_queue */ if (!nvme_alloc_queue(dev, i, dev->q_depth, pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { @@ -1450,7 +1467,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } } - max = min(dev->max_qid, dev->queue_count - 1); + max = min(dev->max_qid, dev->ctrl.queue_count - 1); for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); if (ret) @@ -1585,9 +1602,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev) static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) { struct nvme_host_mem_buf_desc *descs; - u32 chunk_size, max_entries, i = 0; + u32 chunk_size, max_entries; + int i = 0; void **bufs; - u64 size, tmp; + u64 size = 0, tmp; /* start big and work our way down */ chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER); @@ -1866,7 +1884,6 @@ static int nvme_dev_add(struct nvme_dev *dev) static int nvme_pci_enable(struct nvme_dev *dev) { - u64 cap; int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -1893,10 +1910,11 @@ static int nvme_pci_enable(struct nvme_dev *dev) if (result < 0) return result; - cap = lo_hi_readq(dev->bar + NVME_REG_CAP); + dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); - dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); - dev->db_stride = 1 << NVME_CAP_STRIDE(cap); + dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1, + io_queue_depth); + dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); dev->dbs = dev->bar + 4096; /* @@ -1908,6 +1926,12 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "detected Apple NVMe controller, " "set queue depth=%u to work around controller resets\n", dev->q_depth); + } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG && + (pdev->device == 0xa821 || pdev->device == 0xa822) && + NVME_CAP_MQES(dev->ctrl.cap) == 0) { + dev->q_depth = 64; + dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, " + "set queue depth=%u\n", dev->q_depth); } /* @@ -1996,7 +2020,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); queues = dev->online_queues - 1; - for (i = dev->queue_count - 1; i > 0; i--) + for (i = dev->ctrl.queue_count - 1; i > 0; i--) nvme_suspend_queue(dev->queues[i]); if (dead) { @@ -2004,7 +2028,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * probe, before the admin queue is configured. Thus, * queue_count can be 0 here. */ - if (dev->queue_count) + if (dev->ctrl.queue_count) nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev, queues); @@ -2094,7 +2118,7 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - result = nvme_configure_admin_queue(dev); + result = nvme_pci_configure_admin_queue(dev); if (result) goto out; @@ -2132,15 +2156,6 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - /* - * A controller that can not execute IO typically requires user - * intervention to correct. For such degraded controllers, the driver - * should not submit commands the user did not request, so skip - * registering for asynchronous event notification on this condition. - */ - if (dev->online_queues > 1) - nvme_queue_async_events(&dev->ctrl); - /* * Keep the controller around but remove all namespaces if we don't have * any working I/O queue. @@ -2161,8 +2176,7 @@ static void nvme_reset_work(struct work_struct *work) goto out; } - if (dev->online_queues > 1) - nvme_queue_scan(&dev->ctrl); + nvme_start_ctrl(&dev->ctrl); return; out: @@ -2339,11 +2353,13 @@ static void nvme_remove(struct pci_dev *pdev) } flush_work(&dev->ctrl.reset_work); - nvme_uninit_ctrl(&dev->ctrl); + nvme_stop_ctrl(&dev->ctrl); + nvme_remove_namespaces(&dev->ctrl); nvme_dev_disable(dev, true); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); + nvme_uninit_ctrl(&dev->ctrl); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); nvme_put_ctrl(&dev->ctrl); @@ -2455,6 +2471,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6d4119dfbdaa..da04df1af231 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -86,7 +86,7 @@ enum nvme_rdma_queue_flags { struct nvme_rdma_queue { struct nvme_rdma_qe *rsp_ring; - u8 sig_count; + atomic_t sig_count; int queue_size; size_t cmnd_capsule_len; struct nvme_rdma_ctrl *ctrl; @@ -103,7 +103,6 @@ struct nvme_rdma_queue { struct nvme_rdma_ctrl { /* read only in the hot path */ struct nvme_rdma_queue *queues; - u32 queue_count; /* other member variables */ struct blk_mq_tag_set tag_set; @@ -119,7 +118,6 @@ struct nvme_rdma_ctrl { struct blk_mq_tag_set admin_tag_set; struct nvme_rdma_device *device; - u64 cap; u32 max_fr_pages; struct sockaddr_storage addr; @@ -274,9 +272,6 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int ret = 0; - if (!req->mr->need_inval) - goto out; - ib_dereg_mr(req->mr); req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, @@ -349,7 +344,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, struct nvme_rdma_ctrl *ctrl = data; struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1]; - BUG_ON(hctx_idx >= ctrl->queue_count); + BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); hctx->driver_data = queue; return 0; @@ -525,6 +520,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue->cmnd_capsule_len = sizeof(struct nvme_command); queue->queue_size = queue_size; + atomic_set(&queue->sig_count, 0); queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, RDMA_PS_TCP, IB_QPT_RC); @@ -587,7 +583,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); } @@ -595,7 +591,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) { int i, ret = 0; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) { dev_info(ctrl->ctrl.device, @@ -623,14 +619,14 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) return ret; - ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) + ctrl->ctrl.queue_count = nr_io_queues + 1; + if (ctrl->ctrl.queue_count < 2) return 0; dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.opts->queue_size); if (ret) { @@ -705,7 +701,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ++ctrl->ctrl.nr_reconnects; - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_rdma_free_io_queues(ctrl); ret = blk_mq_reinit_tagset(&ctrl->tag_set); @@ -729,13 +725,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (ret) goto requeue; - nvme_start_keep_alive(&ctrl->ctrl); - - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { ret = nvme_rdma_init_io_queues(ctrl); if (ret) goto requeue; @@ -743,16 +737,16 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ret = nvme_rdma_connect_io_queues(ctrl); if (ret) goto requeue; + + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); ctrl->ctrl.nr_reconnects = 0; - if (ctrl->queue_count > 1) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); @@ -770,17 +764,17 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work); int i; - nvme_stop_keep_alive(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); - for (i = 0; i < ctrl->queue_count; i++) + for (i = 0; i < ctrl->ctrl.queue_count; i++) clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); - if (ctrl->queue_count > 1) + if (ctrl->ctrl.queue_count > 1) nvme_stop_queues(&ctrl->ctrl); - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); /* We must take care of fastfail/requeue all our inflight requests */ - if (ctrl->queue_count > 1) + if (ctrl->ctrl.queue_count > 1) blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, @@ -790,7 +784,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) * queues are not a live anymore, so restart the queues to fail fast * new IO */ - blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_start_queues(&ctrl->ctrl); nvme_rdma_reconnect_or_remove(ctrl); @@ -1008,17 +1002,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } -static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) +/* + * We want to signal completion at least every queue depth/2. This returns the + * largest power of two that is not above half of (queue size + 1) to optimize + * (avoid divisions). + */ +static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) { - int sig_limit; + int limit = 1 << ilog2((queue->queue_size + 1) / 2); - /* - * We signal completion every queue depth/2 and also handle the - * degenerated case of a device with queue_depth=1, where we - * would need to signal every message. - */ - sig_limit = max(queue->queue_size / 2, 1); - return (++queue->sig_count % sig_limit) == 0; + return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; } static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, @@ -1574,7 +1567,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, + &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); @@ -1582,9 +1576,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) goto out_cleanup_queue; @@ -1601,8 +1595,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) if (error) goto out_cleanup_queue; - nvme_start_keep_alive(&ctrl->ctrl); - return 0; out_cleanup_queue: @@ -1620,11 +1612,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) { - nvme_stop_keep_alive(&ctrl->ctrl); cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->reconnect_work); - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); @@ -1634,18 +1625,21 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags)) nvme_shutdown_ctrl(&ctrl->ctrl); - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl); } static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - nvme_uninit_ctrl(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); + nvme_remove_namespaces(&ctrl->ctrl); if (shutdown) nvme_rdma_shutdown_ctrl(ctrl); + nvme_uninit_ctrl(&ctrl->ctrl); if (ctrl->ctrl.tagset) { blk_cleanup_queue(ctrl->ctrl.connect_q); blk_mq_free_tag_set(&ctrl->tag_set); @@ -1707,6 +1701,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) int ret; bool changed; + nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl); ret = nvme_rdma_configure_admin_queue(ctrl); @@ -1716,7 +1711,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) goto del_dead_ctrl; } - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { ret = blk_mq_reinit_tagset(&ctrl->tag_set); if (ret) goto del_dead_ctrl; @@ -1728,16 +1723,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) ret = nvme_rdma_connect_io_queues(ctrl); if (ret) goto del_dead_ctrl; + + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - if (ctrl->queue_count > 1) { - nvme_start_queues(&ctrl->ctrl); - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return; @@ -1785,7 +1779,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) + SG_CHUNK_SIZE * sizeof(struct scatterlist); ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ret = blk_mq_alloc_tag_set(&ctrl->tag_set); @@ -1863,12 +1857,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); - ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ + ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ret = -ENOMEM; - ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues), + ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), GFP_KERNEL); if (!ctrl->queues) goto out_uninit_ctrl; @@ -1925,15 +1919,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); mutex_unlock(&nvme_rdma_ctrl_mutex); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return &ctrl->ctrl; out_remove_admin_queue: - nvme_stop_keep_alive(&ctrl->ctrl); nvme_rdma_destroy_admin_queue(ctrl); out_kfree_queues: kfree(ctrl->queues); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 7692a96c9065..1e6dcc241b3c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1164,18 +1164,24 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, memset(acc, 0, sizeof(*acc)); - if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst)) + /* + * FC-NVME spec changes. There are initiators sending different + * lengths as padding sizes for Create Association Cmd descriptor + * was incorrect. + * Accept anything of "minimum" length. Assume format per 1.15 + * spec (with HOSTID reduced to 16 bytes), ignore how long the + * trailing pad length is. + */ + if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN) ret = VERR_CR_ASSOC_LEN; - else if (rqst->desc_list_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_ls_cr_assoc_rqst))) + else if (rqst->desc_list_len < + cpu_to_be32(FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN)) ret = VERR_CR_ASSOC_RQST_LEN; else if (rqst->assoc_cmd.desc_tag != cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD)) ret = VERR_CR_ASSOC_CMD; - else if (rqst->assoc_cmd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_cr_assoc_cmd))) + else if (rqst->assoc_cmd.desc_len < + cpu_to_be32(FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN)) ret = VERR_CR_ASSOC_CMD_LEN; else if (!rqst->assoc_cmd.ersp_ratio || (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >= diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 40128793e613..3b4d47a6abdb 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -85,7 +85,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) bio_set_op_attrs(bio, op, op_flags); bio_chain(bio, prev); - cookie = submit_bio(prev); + submit_bio(prev); } sector += sg->length >> 9; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 5f55c683b338..717ed7ddb2f6 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -44,12 +44,10 @@ struct nvme_loop_iod { struct nvme_loop_ctrl { struct nvme_loop_queue *queues; - u32 queue_count; struct blk_mq_tag_set admin_tag_set; struct list_head list; - u64 cap; struct blk_mq_tag_set tag_set; struct nvme_loop_iod async_event_iod; struct nvme_ctrl ctrl; @@ -241,7 +239,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, struct nvme_loop_ctrl *ctrl = data; struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1]; - BUG_ON(hctx_idx >= ctrl->queue_count); + BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); hctx->driver_data = queue; return 0; @@ -307,7 +305,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); } @@ -330,7 +328,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) if (ret) goto out_destroy_queues; - ctrl->queue_count++; + ctrl->ctrl.queue_count++; } return 0; @@ -344,7 +342,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) { int i, ret; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) return ret; @@ -372,7 +370,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); if (error) return error; - ctrl->queue_count = 1; + ctrl->ctrl.queue_count = 1; error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (error) @@ -388,7 +386,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); @@ -396,9 +394,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) goto out_cleanup_queue; @@ -409,8 +407,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; - nvme_start_keep_alive(&ctrl->ctrl); - return 0; out_cleanup_queue: @@ -424,9 +420,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) { - nvme_stop_keep_alive(&ctrl->ctrl); - - if (ctrl->queue_count > 1) { + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); @@ -436,9 +430,10 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) if (ctrl->ctrl.state == NVME_CTRL_LIVE) nvme_shutdown_ctrl(&ctrl->ctrl); - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_loop_destroy_admin_queue(ctrl); } @@ -447,8 +442,10 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, delete_work); - nvme_uninit_ctrl(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); + nvme_remove_namespaces(&ctrl->ctrl); nvme_loop_shutdown_ctrl(ctrl); + nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); } @@ -496,6 +493,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) bool changed; int ret; + nvme_stop_ctrl(&ctrl->ctrl); nvme_loop_shutdown_ctrl(ctrl); ret = nvme_loop_configure_admin_queue(ctrl); @@ -510,13 +508,13 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_destroy_io; + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - - nvme_start_queues(&ctrl->ctrl); + nvme_start_ctrl(&ctrl->ctrl); return; @@ -559,7 +557,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + SG_CHUNK_SIZE * sizeof(struct scatterlist); ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ctrl->ctrl.tagset = &ctrl->tag_set; @@ -651,10 +649,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, list_add_tail(&ctrl->list, &nvme_loop_ctrl_list); mutex_unlock(&nvme_loop_ctrl_mutex); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); - } + nvme_start_ctrl(&ctrl->ctrl); return &ctrl->ctrl; diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index bc711a10be05..21c37e39e41a 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -17,6 +17,7 @@ /* * This file contains definitions relative to FC-NVME r1.14 (16-020vB). + * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content. */ #ifndef _NVME_FC_H @@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd { uuid_t hostid; u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN]; u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN]; - u8 rsvd632[384]; + __be32 rsvd584[108]; /* pad to 1016 bytes, + * which makes overall LS rqst + * payload 1024 bytes + */ }; +#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN \ + offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584) + +#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN \ + (FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \ + offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio)) + + + /* FCNVME_LSDESC_CREATE_CONN_CMD */ struct fcnvme_lsdesc_cr_conn_cmd { __be32 desc_tag; /* FCNVME_LSDESC_xxx */ @@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst { struct fcnvme_lsdesc_cr_assoc_cmd assoc_cmd; }; +#define FCNVME_LSDESC_CRA_RQST_MINLEN \ + (offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \ + FCNVME_LSDESC_CRA_CMD_DESC_MINLEN) + +#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN \ + FCNVME_LSDESC_CRA_CMD_DESC_MINLEN + + struct fcnvme_ls_cr_assoc_acc { struct fcnvme_ls_acc_hdr hdr; struct fcnvme_lsdesc_assoc_id associd;