From daf7795406bf307997366f694888bd317ae5b5fa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 Dec 2023 14:52:14 -0800 Subject: [PATCH 01/11] scsi: ufs: core: Simplify power management during async scan ufshcd_init() calls pm_runtime_get_sync() before it calls async_schedule(). ufshcd_async_scan() calls pm_runtime_put_sync() directly or indirectly from ufshcd_add_lus(). Simplify ufshcd_async_scan() by always calling pm_runtime_put_sync() from ufshcd_async_scan(). Cc: Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20231218225229.2542156-2-bvanassche@acm.org Reviewed-by: Can Guo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d6ae5d17892c..0ad8bde39cd1 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8711,7 +8711,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba) ufs_bsg_probe(hba); scsi_scan_host(hba->host); - pm_runtime_put_sync(hba->dev); out: return ret; @@ -8980,15 +8979,15 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) /* Probe and add UFS logical units */ ret = ufshcd_add_lus(hba); + out: + pm_runtime_put_sync(hba->dev); /* * If we failed to initialize the device or the device is not * present, turn off the power/clocks etc. */ - if (ret) { - pm_runtime_put_sync(hba->dev); + if (ret) ufshcd_hba_exit(hba); - } } static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) From ee36710912b2075c417100a8acc642c9c6496501 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 Dec 2023 14:52:15 -0800 Subject: [PATCH 02/11] scsi: ufs: core: Remove the ufshcd_hba_exit() call from ufshcd_async_scan() Calling ufshcd_hba_exit() from a function that is called asynchronously from ufshcd_init() is wrong because this triggers multiple race conditions. Instead of calling ufshcd_hba_exit(), log an error message. Reported-by: Daniel Mentz Fixes: 1d337ec2f35e ("ufs: improve init sequence") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20231218225229.2542156-3-bvanassche@acm.org Reviewed-by: Can Guo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0ad8bde39cd1..7c59d7a02243 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8982,12 +8982,9 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) out: pm_runtime_put_sync(hba->dev); - /* - * If we failed to initialize the device or the device is not - * present, turn off the power/clocks etc. - */ + if (ret) - ufshcd_hba_exit(hba); + dev_err(hba->dev, "%s failed: %d\n", __func__, ret); } static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) From b08d86e6eb03566c5dc32e6ff10147f80aeb7511 Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Tue, 19 Dec 2023 17:27:40 +0900 Subject: [PATCH 03/11] scsi: ufs: qcom: Remove unnecessary goto statement from ufs_qcom_config_esi() There is only one place where goto is used, and it is unnecessary to check the ret value through 'goto out' because the ret value is already true. Therefore, remove the goto statement and integrate the '!ret' condition into the existing code. Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20231219082740.27644-1-cw9316.lee@samsung.com Reviewed-by: Christophe JAILLET Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 9fd8d737edea..0879f5ed4e5b 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1714,7 +1714,7 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) ufs_qcom_write_msi_msg); if (ret) { dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret); - goto out; + return ret; } msi_lock_descs(hba->dev); @@ -1748,11 +1748,8 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1), REG_UFS_CFG3); ufshcd_mcq_enable_esi(hba); - } - -out: - if (!ret) host->esi_enabled = true; + } return ret; } From c6d5aa44eaf6d119f9ceb3bfc7d22405ac04232a Mon Sep 17 00:00:00 2001 From: David Strahan Date: Tue, 19 Dec 2023 13:36:50 -0600 Subject: [PATCH 04/11] scsi: smartpqi: Add new controller PCI IDs All PCI ID entries in Hex. Add PCI IDs for Cisco controllers: VID / DID / SVID / SDID ---- ---- ---- ---- Cisco 24G TriMode M1 RAID 4GB FBWC 32D 9005 / 028f / 1137 / 02f8 Cisco 24G TriMode M1 RAID 4GB FBWC 16D 9005 / 028f / 1137 / 02f9 Cisco 24G TriMode M1 HBA 16D 9005 / 028f / 1137 / 02fa Add PCI IDs for CloudNine controllers: VID / DID / SVID / SDID ---- ---- ---- ---- SmartRAID P7604N-16i 9005 / 028f / 1f51 / 100e SmartRAID P7604N-8i 9005 / 028f / 1f51 / 100f SmartRAID P7504N-16i 9005 / 028f / 1f51 / 1010 SmartRAID P7504N-8i 9005 / 028f / 1f51 / 1011 SmartRAID P7504N-8i 9005 / 028f / 1f51 / 1043 SmartHBA P6500-8i 9005 / 028f / 1f51 / 1044 SmartRAID P7504-8i 9005 / 028f / 1f51 / 1045 Reviewed-by: Murthy Bhat Reviewed-by: Mahesh Rajashekhara Reviewed-by: Scott Teel Reviewed-by: Scott Benesh Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: David Strahan Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20231219193653.277553-2-don.brace@microchip.com Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 9a58df9312fa..d56201120087 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -10142,6 +10142,18 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1014, 0x0718) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02f8) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02f9) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02fa) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1e93, 0x1000) @@ -10198,6 +10210,34 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1f51, 0x100a) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x100e) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x100f) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1010) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1011) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1043) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1044) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1045) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_ANY_ID, PCI_ANY_ID) From fb4cece17b4583f55b34a8538e27a4adc833c9d4 Mon Sep 17 00:00:00 2001 From: Mahesh Rajashekhara Date: Tue, 19 Dec 2023 13:36:51 -0600 Subject: [PATCH 05/11] scsi: smartpqi: Fix logical volume rescan race condition Correct rescan flag race condition. Multiple conditions are being evaluated before notifying OS to do a rescan. Driver will skip rescanning the device if any one of the following conditions are met: - Devices that have not yet been added to the OS or devices that have been removed. - Devices which are already marked for removal or in the phase of removal. Under very rare conditions, after logical volume size expansion, the OS still sees the size of the logical volume which was before expansion. The rescan flag in the driver is used to signal the need for a logical volume rescan. A race condition can occur in the driver, and it leads to one thread overwriting the flag inadvertently. As a result, driver is not notifying the OS SML to rescan the logical volume. Move device->rescan update into new function pqi_mark_volumes_for_rescan() and protect with a spin lock. Move check for device->rescan into new function pqi_volume_rescan_needed() and protect function call with a spin_lock. Reviewed-by: Scott Teel Reviewed-by: Scott Benesh Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Co-developed-by: Murthy Bhat Signed-off-by: Murthy Bhat Signed-off-by: Mahesh Rajashekhara Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20231219193653.277553-3-don.brace@microchip.com Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi.h | 1 - drivers/scsi/smartpqi/smartpqi_init.c | 43 ++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h index 041940183516..cdedc271857a 100644 --- a/drivers/scsi/smartpqi/smartpqi.h +++ b/drivers/scsi/smartpqi/smartpqi.h @@ -1347,7 +1347,6 @@ struct pqi_ctrl_info { bool controller_online; bool block_requests; bool scan_blocked; - u8 logical_volume_rescan_needed : 1; u8 inbound_spanning_supported : 1; u8 outbound_spanning_supported : 1; u8 pqi_mode_enabled : 1; diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index d56201120087..081bb2c09806 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -2093,8 +2093,6 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info, if (existing_device->devtype == TYPE_DISK) { existing_device->raid_level = new_device->raid_level; existing_device->volume_status = new_device->volume_status; - if (ctrl_info->logical_volume_rescan_needed) - existing_device->rescan = true; memset(existing_device->next_bypass_group, 0, sizeof(existing_device->next_bypass_group)); if (!pqi_raid_maps_equal(existing_device->raid_map, new_device->raid_map)) { kfree(existing_device->raid_map); @@ -2164,6 +2162,20 @@ static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device) INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker); } +static inline bool pqi_volume_rescan_needed(struct pqi_scsi_dev *device) +{ + if (pqi_device_in_remove(device)) + return false; + + if (device->sdev == NULL) + return false; + + if (!scsi_device_online(device->sdev)) + return false; + + return device->rescan; +} + static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices) { @@ -2284,9 +2296,13 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, if (device->sdev && device->queue_depth != device->advertised_queue_depth) { device->advertised_queue_depth = device->queue_depth; scsi_change_queue_depth(device->sdev, device->advertised_queue_depth); - if (device->rescan) { - scsi_rescan_device(device->sdev); + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + if (pqi_volume_rescan_needed(device)) { device->rescan = false; + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + scsi_rescan_device(device->sdev); + } else { + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); } } } @@ -2308,8 +2324,6 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, } } - ctrl_info->logical_volume_rescan_needed = false; - } static inline bool pqi_is_supported_device(struct pqi_scsi_dev *device) @@ -3702,6 +3716,21 @@ static bool pqi_ofa_process_event(struct pqi_ctrl_info *ctrl_info, return ack_event; } +static void pqi_mark_volumes_for_rescan(struct pqi_ctrl_info *ctrl_info) +{ + unsigned long flags; + struct pqi_scsi_dev *device; + + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + list_for_each_entry(device, &ctrl_info->scsi_device_list, scsi_device_list_entry) { + if (pqi_is_logical_device(device) && device->devtype == TYPE_DISK) + device->rescan = true; + } + + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); +} + static void pqi_disable_raid_bypass(struct pqi_ctrl_info *ctrl_info) { unsigned long flags; @@ -3742,7 +3771,7 @@ static void pqi_event_worker(struct work_struct *work) ack_event = true; rescan_needed = true; if (event->event_type == PQI_EVENT_TYPE_LOGICAL_DEVICE) - ctrl_info->logical_volume_rescan_needed = true; + pqi_mark_volumes_for_rescan(ctrl_info); else if (event->event_type == PQI_EVENT_TYPE_AIO_STATE_CHANGE) pqi_disable_raid_bypass(ctrl_info); } From 8c9955107762a23043db544d83959c4e0103bae3 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 19 Dec 2023 13:36:52 -0600 Subject: [PATCH 06/11] scsi: smartpqi: Bump driver version to 2.1.26-030 Reviewed-by: Mahesh Rajashekhara Reviewed-by: Murthy Bhat Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20231219193653.277553-4-don.brace@microchip.com Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 081bb2c09806..ceff1ec13f9e 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -33,11 +33,11 @@ #define BUILD_TIMESTAMP #endif -#define DRIVER_VERSION "2.1.24-046" +#define DRIVER_VERSION "2.1.26-030" #define DRIVER_MAJOR 2 #define DRIVER_MINOR 1 -#define DRIVER_RELEASE 24 -#define DRIVER_REVISION 46 +#define DRIVER_RELEASE 26 +#define DRIVER_REVISION 30 #define DRIVER_NAME "Microchip SmartPQI Driver (v" \ DRIVER_VERSION BUILD_TIMESTAMP ")" From 904fdd2062f3101fb09db8ee077abf7ffd95e538 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 20 Dec 2023 21:31:13 -0800 Subject: [PATCH 07/11] scsi: mpi3mr: Fix mpi3mr_fw.c kernel-doc warnings Use correct format for function return values. Delete blank lines that are reported as "bad line:". mpi3mr_fw.c:482: warning: No description found for return value of 'mpi3mr_get_reply_desc' mpi3mr_fw.c:1066: warning: bad line: mpi3mr_fw.c:1109: warning: bad line: mpi3mr_fw.c:1249: warning: No description found for return value of 'mpi3mr_revalidate_factsdata' Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20231221053113.32191-1-rdunlap@infradead.org Cc: Sathya Prakash Veerichetty Cc: Kashyap Desai Cc: Sumit Saxena Cc: Sreekanth Reddy Cc: Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index d8c57a0a518f..528f19f782f2 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -475,7 +475,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) * @op_reply_q: op_reply_qinfo object * @reply_ci: operational reply descriptor's queue consumer index * - * Returns reply descriptor frame address + * Returns: reply descriptor frame address */ static inline struct mpi3_default_reply_descriptor * mpi3mr_get_reply_desc(struct op_reply_qinfo *op_reply_q, u32 reply_ci) @@ -1063,7 +1063,6 @@ enum mpi3mr_iocstate mpi3mr_get_iocstate(struct mpi3mr_ioc *mrioc) * @mrioc: Adapter instance reference * * Free the DMA memory allocated for IOCTL handling purpose. - * * Return: None */ @@ -1106,7 +1105,6 @@ static void mpi3mr_free_ioctl_dma_memory(struct mpi3mr_ioc *mrioc) /** * mpi3mr_alloc_ioctl_dma_memory - Alloc memory for ioctl dma * @mrioc: Adapter instance reference - * * This function allocates dmaable memory required to handle the * application issued MPI3 IOCTL requests. @@ -1241,7 +1239,7 @@ static int mpi3mr_issue_and_process_mur(struct mpi3mr_ioc *mrioc, * during reset/resume * @mrioc: Adapter instance reference * - * Return zero if the new IOCFacts parameters value is compatible with + * Return: zero if the new IOCFacts parameters value is compatible with * older values else return -EPERM */ static int From 567a1e852e872e702b18d271a3dbce2a75efbaff Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Tue, 2 Jan 2024 00:52:45 -0800 Subject: [PATCH 08/11] scsi: fcoe: Fix unsigned comparison with zero in store_ctlr_mode() ctlr->mode is of unsigned type, it is never less than zero. Fix this by using an extra variable called 'res', to store return value from sysfs_match_string() and assign that to ctlr->mode on the success path. Fixes: edc22a7c8688 ("scsi: fcoe: Use sysfs_match_string() over fcoe_parse_mode()") Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20240102085245.600570-1-harshit.m.mogalapalli@oracle.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c index 408a806bf4c2..c64a085a7ee2 100644 --- a/drivers/scsi/fcoe/fcoe_sysfs.c +++ b/drivers/scsi/fcoe/fcoe_sysfs.c @@ -263,6 +263,7 @@ static ssize_t store_ctlr_mode(struct device *dev, const char *buf, size_t count) { struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + int res; if (count > FCOE_MAX_MODENAME_LEN) return -EINVAL; @@ -279,12 +280,13 @@ static ssize_t store_ctlr_mode(struct device *dev, return -ENOTSUPP; } - ctlr->mode = sysfs_match_string(fip_conn_type_names, buf); - if (ctlr->mode < 0 || ctlr->mode == FIP_CONN_TYPE_UNKNOWN) { + res = sysfs_match_string(fip_conn_type_names, buf); + if (res < 0 || res == FIP_CONN_TYPE_UNKNOWN) { LIBFCOE_SYSFS_DBG(ctlr, "Unknown mode %s provided.\n", buf); return -EINVAL; } + ctlr->mode = res; ctlr->f->set_fcoe_ctlr_mode(ctlr); LIBFCOE_SYSFS_DBG(ctlr, "Mode changed to %s.\n", buf); From 38945c2b006b23a1a7a0c88d76e3294c6199891c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:41:55 +0300 Subject: [PATCH 09/11] scsi: fnic: unlock on error path in fnic_queuecommand() Call spin_unlock_irqrestore(&fnic->wq_copy_lock[hwq], flags) before returning. Fixes: c81df08cd294 ("scsi: fnic: Add support for multiqueue (MQ) in fnic driver") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/5360fa20-74bc-4c22-a78e-ea8b18c5410d@moroto.mountain Reviewed-by: Karan Tilak Kumar Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic_scsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 4d6db4509e75..8d7fc5284293 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -546,6 +546,7 @@ int fnic_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc) if (fnic->sw_copy_wq[hwq].io_req_table[blk_mq_unique_tag_to_tag(mqtag)] != NULL) { WARN(1, "fnic<%d>: %s: hwq: %d tag 0x%x already exists\n", fnic->fnic_num, __func__, hwq, blk_mq_unique_tag_to_tag(mqtag)); + spin_unlock_irqrestore(&fnic->wq_copy_lock[hwq], flags); return SCSI_MLQUEUE_HOST_BUSY; } From 6df0e077d76bd144c533b61d6182676aae6b0a85 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 Jan 2024 13:05:32 +0100 Subject: [PATCH 10/11] scsi: core: Kick the requeue list after inserting when flushing When libata calls ata_link_abort() to abort all ata queued commands, it calls blk_abort_request() on the SCSI command representing each QC. This causes scsi_timeout() to be called, which calls scsi_eh_scmd_add() for each SCSI command. scsi_eh_scmd_add() sets the SCSI host to state recovery, and then adds the command to shost->eh_cmd_q. This will wake up the SCSI EH, and eventually the libata EH strategy handler will be called, which calls scsi_eh_flush_done_q() to either flush retry or flush finish each failed command. The commands that are flush retried by scsi_eh_flush_done_q() are done so using scsi_queue_insert(). Before commit 8b566edbdbfb ("scsi: core: Only kick the requeue list if necessary"), __scsi_queue_insert() called blk_mq_requeue_request() with the second argument set to true, indicating that it should always kick/run the requeue list after inserting. After commit 8b566edbdbfb ("scsi: core: Only kick the requeue list if necessary"), __scsi_queue_insert() does not kick/run the requeue list after inserting, if the current SCSI host state is recovery (which is the case in the libata example above). This optimization is probably fine in most cases, as I can only assume that most often someone will eventually kick/run the queues. However, that is not the case for scsi_eh_flush_done_q(), where we can see that the request gets inserted to the requeue list, but the queue is never started after the request has been inserted, leading to the block layer waiting for the completion of command that never gets to run. Since scsi_eh_flush_done_q() is called by SCSI EH context, the SCSI host state is most likely always in recovery when this function is called. Thus, let scsi_eh_flush_done_q() explicitly kick the requeue list after inserting a flush retry command, so that scsi_eh_flush_done_q() keeps the same behavior as before commit 8b566edbdbfb ("scsi: core: Only kick the requeue list if necessary"). Simple reproducer for the libata example above: $ hdparm -Y /dev/sda $ echo 1 > /sys/class/scsi_device/0\:0\:0\:0/device/delete Fixes: 8b566edbdbfb ("scsi: core: Only kick the requeue list if necessary") Reported-by: Kevin Locke Closes: https://lore.kernel.org/linux-scsi/ZZw3Th70wUUvCiCY@kevinlocke.name/ Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20240111120533.3612509-1-cassel@kernel.org Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 1bac12ef238e..44c1a89b717a 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2196,15 +2196,18 @@ void scsi_eh_flush_done_q(struct list_head *done_q) struct scsi_cmnd *scmd, *next; list_for_each_entry_safe(scmd, next, done_q, eh_entry) { + struct scsi_device *sdev = scmd->device; + list_del_init(&scmd->eh_entry); - if (scsi_device_online(scmd->device) && - !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd) && - scsi_eh_should_retry_cmd(scmd)) { + if (scsi_device_online(sdev) && !scsi_noretry_cmd(scmd) && + scsi_cmd_retry_allowed(scmd) && + scsi_eh_should_retry_cmd(scmd)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: flush retry cmd\n", current->comm)); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); + blk_mq_kick_requeue_list(sdev->request_queue); } else { /* * If just we got sense for the device (called From 83ab68168a3d990d5ff39ab030ad5754cbbccb25 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Thu, 11 Jan 2024 15:59:41 +0300 Subject: [PATCH 11/11] scsi: target: core: Add TMF to tmr_list handling An abort that is responded to by iSCSI itself is added to tmr_list but does not go to target core. A LUN_RESET that goes through tmr_list takes a refcounter on the abort and waits for completion. However, the abort will be never complete because it was not started in target core. Unable to locate ITT: 0x05000000 on CID: 0 Unable to locate RefTaskTag: 0x05000000 on CID: 0. wait_for_tasks: Stopping tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop wait for tasks: tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop ... INFO: task kworker/0:2:49 blocked for more than 491 seconds. task:kworker/0:2 state:D stack: 0 pid: 49 ppid: 2 flags:0x00000800 Workqueue: events target_tmr_work [target_core_mod] Call Trace: __switch_to+0x2c4/0x470 _schedule+0x314/0x1730 schedule+0x64/0x130 schedule_timeout+0x168/0x430 wait_for_completion+0x140/0x270 target_put_cmd_and_wait+0x64/0xb0 [target_core_mod] core_tmr_lun_reset+0x30/0xa0 [target_core_mod] target_tmr_work+0xc8/0x1b0 [target_core_mod] process_one_work+0x2d4/0x5d0 worker_thread+0x78/0x6c0 To fix this, only add abort to tmr_list if it will be handled by target core. Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20240111125941.8688-1-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_device.c | 5 ----- drivers/target/target_core_transport.c | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 506193e870c4..7a85e6477e46 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - unsigned long flags; rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); @@ -178,10 +177,6 @@ out_unlock: se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); - spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); - spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); - return 0; } EXPORT_SYMBOL(transport_lookup_tmr_lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 670cfb7bd426..73d0d6133ac8 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3629,6 +3629,10 @@ int transport_generic_handle_tmr( unsigned long flags; bool aborted = false; + spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags); + list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list); + spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->transport_state & CMD_T_ABORTED) { aborted = true;