nvme: if_ready checks to fail io to deleting controller

The revised if_ready checks skipped over the case of returning error when
the controller is being deleted.  Instead it was returning BUSY, which
caused the ios to retry, which caused the ns delete to hang waiting for
the ios to drain.

Stack trace of hang looks like:
 kworker/u64:2   D    0    74      2 0x80000000
 Workqueue: nvme-delete-wq nvme_delete_ctrl_work [nvme_core]
 Call Trace:
  ? __schedule+0x26d/0x820
  schedule+0x32/0x80
  blk_mq_freeze_queue_wait+0x36/0x80
  ? remove_wait_queue+0x60/0x60
  blk_cleanup_queue+0x72/0x160
  nvme_ns_remove+0x106/0x140 [nvme_core]
  nvme_remove_namespaces+0x7e/0xa0 [nvme_core]
  nvme_delete_ctrl_work+0x4d/0x80 [nvme_core]
  process_one_work+0x160/0x350
  worker_thread+0x1c3/0x3d0
  kthread+0xf5/0x130
  ? process_one_work+0x350/0x350
  ? kthread_bind+0x10/0x10
  ret_from_fork+0x1f/0x30

Extend nvmf_fail_nonready_command() to supply the controller pointer so
that the controller state can be looked at. Fail any io to a controller
that is deleting.

Fixes: 3bc32bb118 ("nvme-fabrics: refactor queue ready check")
Fixes: 35897b920c ("nvme-fabrics: fix and refine state checks in __nvmf_check_ready")
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
This commit is contained in:
James Smart 2018-07-20 15:49:48 -07:00 committed by Christoph Hellwig
parent d082dc1562
commit 6cdefc6e2a
5 changed files with 12 additions and 7 deletions

View file

@ -539,14 +539,18 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
/*
* For something we're not in a state to send to the device the default action
* is to busy it and retry it after the controller state is recovered. However,
* anything marked for failfast or nvme multipath is immediately failed.
* if the controller is deleting or if anything is marked for failfast or
* nvme multipath it is immediately failed.
*
* Note: commands used to initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
blk_status_t nvmf_fail_nonready_command(struct request *rq)
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
if (ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DEAD &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;

View file

@ -162,7 +162,8 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_fail_nonready_command(struct request *rq);
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live);

View file

@ -2272,7 +2272,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(rq);
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_setup_cmd(ns, rq, sqe);
if (ret)

View file

@ -1639,7 +1639,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
WARN_ON_ONCE(rq->tag < 0);
if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(rq);
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma,

View file

@ -162,7 +162,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t ret;
if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
return nvmf_fail_nonready_command(req);
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req);
ret = nvme_setup_cmd(ns, req, &iod->cmd);
if (ret)