nvme: Prevent resets during paused controller state

A paused controller is doing critical internal activation work in the
background. Prevent subsequent controller resets from occurring during
this period by setting the controller state to RESETTING first. A helper
function, nvme_try_sched_reset_work(), is introduced for these paths so
they may continue with scheduling the reset_work after they've completed
their uninterruptible critical section.

Tested-by: Edmund Nadolski <edmund.nadolski@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
Keith Busch 2019-09-06 11:23:08 -06:00
parent 92b98e88d5
commit 4c75f87785

View file

@ -120,6 +120,21 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
queue_work(nvme_wq, &ctrl->scan_work);
}
/*
* Use this function to proceed with scheduling reset_work for a controller
* that had previously been set to the resetting state. This is intended for
* code paths that can't be interrupted by other reset attempts. A hot removal
* may prevent this from succeeding.
*/
static int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
{
if (ctrl->state != NVME_CTRL_RESETTING)
return -EBUSY;
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
return -EBUSY;
return 0;
}
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@ -3828,13 +3843,13 @@ static void nvme_fw_act_work(struct work_struct *work)
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
nvme_reset_ctrl(ctrl);
break;
nvme_try_sched_reset(ctrl);
return;
}
msleep(100);
}
if (ctrl->state != NVME_CTRL_LIVE)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
nvme_start_queues(ctrl);
@ -3854,7 +3869,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
queue_work(nvme_wq, &ctrl->fw_act_work);
/*
* We are (ab)using the RESETTING state to prevent subsequent
* recovery actions from interfering with the controller's
* firmware activation.
*/
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA: