From 0b0747d507bffb827e40fc0f9fb5883fffc23477 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 28 Aug 2023 15:10:18 -0700 Subject: [PATCH 01/16] scsi: megaraid_sas: Fix deadlock on firmware crashdump The following processes run into a deadlock. CPU 41 was waiting for CPU 29 to handle a CSD request while holding spinlock "crashdump_lock", but CPU 29 was hung by that spinlock with IRQs disabled. PID: 17360 TASK: ffff95c1090c5c40 CPU: 41 COMMAND: "mrdiagd" !# 0 [ffffb80edbf37b58] __read_once_size at ffffffff9b871a40 include/linux/compiler.h:185:0 !# 1 [ffffb80edbf37b58] atomic_read at ffffffff9b871a40 arch/x86/include/asm/atomic.h:27:0 !# 2 [ffffb80edbf37b58] dump_stack at ffffffff9b871a40 lib/dump_stack.c:54:0 # 3 [ffffb80edbf37b78] csd_lock_wait_toolong at ffffffff9b131ad5 kernel/smp.c:364:0 # 4 [ffffb80edbf37b78] __csd_lock_wait at ffffffff9b131ad5 kernel/smp.c:384:0 # 5 [ffffb80edbf37bf8] csd_lock_wait at ffffffff9b13267a kernel/smp.c:394:0 # 6 [ffffb80edbf37bf8] smp_call_function_many at ffffffff9b13267a kernel/smp.c:843:0 # 7 [ffffb80edbf37c50] smp_call_function at ffffffff9b13279d kernel/smp.c:867:0 # 8 [ffffb80edbf37c50] on_each_cpu at ffffffff9b13279d kernel/smp.c:976:0 # 9 [ffffb80edbf37c78] flush_tlb_kernel_range at ffffffff9b085c4b arch/x86/mm/tlb.c:742:0 #10 [ffffb80edbf37cb8] __purge_vmap_area_lazy at ffffffff9b23a1e0 mm/vmalloc.c:701:0 #11 [ffffb80edbf37ce0] try_purge_vmap_area_lazy at ffffffff9b23a2cc mm/vmalloc.c:722:0 #12 [ffffb80edbf37ce0] free_vmap_area_noflush at ffffffff9b23a2cc mm/vmalloc.c:754:0 #13 [ffffb80edbf37cf8] free_unmap_vmap_area at ffffffff9b23bb3b mm/vmalloc.c:764:0 #14 [ffffb80edbf37cf8] remove_vm_area at ffffffff9b23bb3b mm/vmalloc.c:1509:0 #15 [ffffb80edbf37d18] __vunmap at ffffffff9b23bb8a mm/vmalloc.c:1537:0 #16 [ffffb80edbf37d40] vfree at ffffffff9b23bc85 mm/vmalloc.c:1612:0 #17 [ffffb80edbf37d58] megasas_free_host_crash_buffer [megaraid_sas] at ffffffffc020b7f2 drivers/scsi/megaraid/megaraid_sas_fusion.c:3932:0 #18 [ffffb80edbf37d80] fw_crash_state_store [megaraid_sas] at ffffffffc01f804d drivers/scsi/megaraid/megaraid_sas_base.c:3291:0 #19 [ffffb80edbf37dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 #20 [ffffb80edbf37dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 #21 [ffffb80edbf37de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 #22 [ffffb80edbf37e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 #23 [ffffb80edbf37ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 #24 [ffffb80edbf37ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 #25 [ffffb80edbf37ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 #26 [ffffb80edbf37f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 #27 [ffffb80edbf37f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 PID: 17355 TASK: ffff95c1090c3d80 CPU: 29 COMMAND: "mrdiagd" !# 0 [ffffb80f2d3c7d30] __read_once_size at ffffffff9b0f2ab0 include/linux/compiler.h:185:0 !# 1 [ffffb80f2d3c7d30] native_queued_spin_lock_slowpath at ffffffff9b0f2ab0 kernel/locking/qspinlock.c:368:0 # 2 [ffffb80f2d3c7d58] pv_queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/paravirt.h:674:0 # 3 [ffffb80f2d3c7d58] queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/qspinlock.h:53:0 # 4 [ffffb80f2d3c7d68] queued_spin_lock at ffffffff9b8961a6 include/asm-generic/qspinlock.h:90:0 # 5 [ffffb80f2d3c7d68] do_raw_spin_lock_flags at ffffffff9b8961a6 include/linux/spinlock.h:173:0 # 6 [ffffb80f2d3c7d68] __raw_spin_lock_irqsave at ffffffff9b8961a6 include/linux/spinlock_api_smp.h:122:0 # 7 [ffffb80f2d3c7d68] _raw_spin_lock_irqsave at ffffffff9b8961a6 kernel/locking/spinlock.c:160:0 # 8 [ffffb80f2d3c7d88] fw_crash_buffer_store [megaraid_sas] at ffffffffc01f8129 drivers/scsi/megaraid/megaraid_sas_base.c:3205:0 # 9 [ffffb80f2d3c7dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 #10 [ffffb80f2d3c7dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 #11 [ffffb80f2d3c7de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 #12 [ffffb80f2d3c7e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 #13 [ffffb80f2d3c7ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 #14 [ffffb80f2d3c7ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 #15 [ffffb80f2d3c7ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 #16 [ffffb80f2d3c7f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 #17 [ffffb80f2d3c7f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 The lock is used to synchronize different sysfs operations, it doesn't protect any resource that will be touched by an interrupt. Consequently it's not required to disable IRQs. Replace the spinlock with a mutex to fix the deadlock. Signed-off-by: Junxiao Bi Link: https://lore.kernel.org/r/20230828221018.19471-1-junxiao.bi@oracle.com Reviewed-by: Mike Christie Cc: stable@vger.kernel.org Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 2 +- drivers/scsi/megaraid/megaraid_sas_base.c | 21 +++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 3554f6b07727..94abba57582d 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2332,7 +2332,7 @@ struct megasas_instance { u32 support_morethan256jbod; /* FW support for more than 256 PD/JBOD */ bool use_seqnum_jbod_fp; /* Added for PD sequence */ bool smp_affinity_enable; - spinlock_t crashdump_lock; + struct mutex crashdump_lock; struct megasas_register_set __iomem *reg_set; u32 __iomem *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY]; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index b9d46dcb5210..e1aa667dae66 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3271,14 +3271,13 @@ fw_crash_buffer_store(struct device *cdev, struct megasas_instance *instance = (struct megasas_instance *) shost->hostdata; int val = 0; - unsigned long flags; if (kstrtoint(buf, 0, &val) != 0) return -EINVAL; - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); instance->fw_crash_buffer_offset = val; - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return strlen(buf); } @@ -3293,24 +3292,23 @@ fw_crash_buffer_show(struct device *cdev, unsigned long dmachunk = CRASH_DMA_BUF_SIZE; unsigned long chunk_left_bytes; unsigned long src_addr; - unsigned long flags; u32 buff_offset; - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); buff_offset = instance->fw_crash_buffer_offset; if (!instance->crash_dump_buf || !((instance->fw_crash_state == AVAILABLE) || (instance->fw_crash_state == COPYING))) { dev_err(&instance->pdev->dev, "Firmware crash dump is not available\n"); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return -EINVAL; } if (buff_offset > (instance->fw_crash_buffer_size * dmachunk)) { dev_err(&instance->pdev->dev, "Firmware crash dump offset is out of range\n"); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return 0; } @@ -3322,7 +3320,7 @@ fw_crash_buffer_show(struct device *cdev, src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] + (buff_offset % dmachunk); memcpy(buf, (void *)src_addr, size); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return size; } @@ -3347,7 +3345,6 @@ fw_crash_state_store(struct device *cdev, struct megasas_instance *instance = (struct megasas_instance *) shost->hostdata; int val = 0; - unsigned long flags; if (kstrtoint(buf, 0, &val) != 0) return -EINVAL; @@ -3361,9 +3358,9 @@ fw_crash_state_store(struct device *cdev, instance->fw_crash_state = val; if ((val == COPIED) || (val == COPY_ERROR)) { - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); megasas_free_host_crash_buffer(instance); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); if (val == COPY_ERROR) dev_info(&instance->pdev->dev, "application failed to " "copy Firmware crash dump\n"); @@ -7422,7 +7419,7 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) init_waitqueue_head(&instance->int_cmd_wait_q); init_waitqueue_head(&instance->abort_cmd_wait_q); - spin_lock_init(&instance->crashdump_lock); + mutex_init(&instance->crashdump_lock); spin_lock_init(&instance->mfi_pool_lock); spin_lock_init(&instance->hba_lock); spin_lock_init(&instance->stream_lock); From 31a0865bf593e59c4433a3624b4c87c40049ed9a Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Wed, 30 Aug 2023 23:19:42 -0600 Subject: [PATCH 02/16] scsi: ppa: Fix accidentally reversed conditions for 16-bit and 32-bit EPP The conditions were correct in the ppa_in() function but not in the ppa_out() function. Fixes: 68a4f84a17c1 ("scsi: ppa: Add a module parameter for the transfer mode") Signed-off-by: Alex Henrie Link: https://lore.kernel.org/r/20230831051945.515476-1-alexhenrie24@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ppa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index 19f0b93fa3d8..d592ee9170c1 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -307,9 +307,9 @@ static int ppa_out(ppa_struct *dev, char *buffer, int len) case PPA_EPP_8: epp_reset(ppb); w_ctr(ppb, 0x4); - if (dev->mode == PPA_EPP_32 && !(((long) buffer | len) & 0x01)) + if (dev->mode == PPA_EPP_32 && !(((long) buffer | len) & 0x03)) outsl(ppb + 4, buffer, len >> 2); - else if (dev->mode == PPA_EPP_16 && !(((long) buffer | len) & 0x03)) + else if (dev->mode == PPA_EPP_16 && !(((long) buffer | len) & 0x01)) outsw(ppb + 4, buffer, len >> 1); else outsb(ppb + 4, buffer, len); From 0be7592885d7b4c20595c388adc13930b653b847 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 31 Aug 2023 16:51:45 +0530 Subject: [PATCH 03/16] scsi: qla2xxx: Correct endianness for rqstlen and rsplen rqstlen and rsplen were changed to __le32 to fix sparse warnings: drivers/scsi/qla2xxx/qla_nvme.c:402:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:402:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:402:30: got unsigned short [usertype] rsplen drivers/scsi/qla2xxx/qla_nvme.c:507:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:507:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:507:30: got unsigned int [usertype] rqstlen drivers/scsi/qla2xxx/qla_nvme.c:508:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:508:30: expected restricted __le32 [usertype] rsp_len drivers/scsi/qla2xxx/qla_nvme.c:508:30: got unsigned int [usertype] rsplen Correct the endianness in qla2xxx driver thus avoiding changes in nvme-fc-driver.h. Fixes: 875386b98857 ("scsi: qla2xxx: Add Unsolicited LS Request and Response Support for NVMe") Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230831112146.32595-1-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_nvme.c | 10 +++++----- include/linux/nvme-fc-driver.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index db753d712991..a8ddf356e662 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -399,14 +399,14 @@ static int qla_nvme_xmt_ls_rsp(struct nvme_fc_local_port *lport, nvme->u.nvme.dl = 0; nvme->u.nvme.timeout_sec = 0; nvme->u.nvme.cmd_dma = fd_resp->rspdma; - nvme->u.nvme.cmd_len = fd_resp->rsplen; + nvme->u.nvme.cmd_len = cpu_to_le32(fd_resp->rsplen); nvme->u.nvme.rsp_len = 0; nvme->u.nvme.rsp_dma = 0; nvme->u.nvme.exchange_address = uctx->exchange_address; nvme->u.nvme.nport_handle = uctx->nport_handle; nvme->u.nvme.ox_id = uctx->ox_id; dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, - le32_to_cpu(fd_resp->rsplen), DMA_TO_DEVICE); + fd_resp->rsplen, DMA_TO_DEVICE); ql_dbg(ql_dbg_unsol, vha, 0x2122, "Unsol lsreq portid=%06x %8phC exchange_address 0x%x ox_id 0x%x hdl 0x%x\n", @@ -504,13 +504,13 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, nvme->u.nvme.desc = fd; nvme->u.nvme.dir = 0; nvme->u.nvme.dl = 0; - nvme->u.nvme.cmd_len = fd->rqstlen; - nvme->u.nvme.rsp_len = fd->rsplen; + nvme->u.nvme.cmd_len = cpu_to_le32(fd->rqstlen); + nvme->u.nvme.rsp_len = cpu_to_le32(fd->rsplen); nvme->u.nvme.rsp_dma = fd->rspdma; nvme->u.nvme.timeout_sec = fd->timeout; nvme->u.nvme.cmd_dma = fd->rqstdma; dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, - le32_to_cpu(fd->rqstlen), DMA_TO_DEVICE); + fd->rqstlen, DMA_TO_DEVICE); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f6ef8cf5d774..4109f1bd6128 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -53,10 +53,10 @@ struct nvmefc_ls_req { void *rqstaddr; dma_addr_t rqstdma; - __le32 rqstlen; + u32 rqstlen; void *rspaddr; dma_addr_t rspdma; - __le32 rsplen; + u32 rsplen; u32 timeout; void *private; @@ -120,7 +120,7 @@ struct nvmefc_ls_req { struct nvmefc_ls_rsp { void *rspbuf; dma_addr_t rspdma; - __le32 rsplen; + u16 rsplen; void (*done)(struct nvmefc_ls_rsp *rsp); void *nvme_fc_private; /* LLDD is not to access !! */ From 59f10a05b5c7b675256a66e3161741239889ff80 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 31 Aug 2023 16:51:46 +0530 Subject: [PATCH 04/16] scsi: qla2xxx: Use raw_smp_processor_id() instead of smp_processor_id() The following call trace was observed: localhost kernel: nvme nvme0: NVME-FC{0}: controller connect complete localhost kernel: BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u129:4/75092 localhost kernel: nvme nvme0: NVME-FC{0}: new ctrl: NQN "nqn.1992-08.com.netapp:sn.b42d198afb4d11ecad6d00a098d6abfa:subsystem.PR_Channel2022_RH84_subsystem_291" localhost kernel: caller is qla_nvme_post_cmd+0x216/0x1380 [qla2xxx] localhost kernel: CPU: 6 PID: 75092 Comm: kworker/u129:4 Kdump: loaded Tainted: G B W OE --------- --- 5.14.0-70.22.1.el9_0.x86_64+debug #1 localhost kernel: Hardware name: HPE ProLiant XL420 Gen10/ProLiant XL420 Gen10, BIOS U39 01/13/2022 localhost kernel: Workqueue: nvme-wq nvme_async_event_work [nvme_core] localhost kernel: Call Trace: localhost kernel: dump_stack_lvl+0x57/0x7d localhost kernel: check_preemption_disabled+0xc8/0xd0 localhost kernel: qla_nvme_post_cmd+0x216/0x1380 [qla2xxx] Use raw_smp_processor_id() instead of smp_processor_id(). Also use queue_work() across the driver instead of queue_work_on() thus avoiding usage of smp_processor_id() when CONFIG_DEBUG_PREEMPT is enabled. Cc: stable@vger.kernel.org Suggested-by: John Garry Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230831112146.32595-2-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_inline.h | 2 +- drivers/scsi/qla2xxx/qla_isr.c | 6 +++--- drivers/scsi/qla2xxx/qla_target.c | 3 +-- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 0556969f6dc1..a4a56ab0ba74 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -577,7 +577,7 @@ fcport_is_bigger(fc_port_t *fcport) static inline struct qla_qpair * qla_mapq_nvme_select_qpair(struct qla_hw_data *ha, struct qla_qpair *qpair) { - int cpuid = smp_processor_id(); + int cpuid = raw_smp_processor_id(); if (qpair->cpuid != cpuid && ha->qp_cpu_map[cpuid]) { diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index e98788191897..d48007e18288 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3965,7 +3965,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, if (!ha->flags.fw_started) return; - if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) { + if (rsp->qpair->cpuid != raw_smp_processor_id() || !rsp->qpair->rcv_intr) { rsp->qpair->rcv_intr = 1; if (!rsp->qpair->cpu_mapped) @@ -4468,7 +4468,7 @@ qla2xxx_msix_rsp_q(int irq, void *dev_id) } ha = qpair->hw; - queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work); + queue_work(ha->wq, &qpair->q_work); return IRQ_HANDLED; } @@ -4494,7 +4494,7 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id) wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); spin_unlock_irqrestore(&ha->hardware_lock, flags); - queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work); + queue_work(ha->wq, &qpair->q_work); return IRQ_HANDLED; } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2b815a9928ea..2ef2dbac0db2 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4425,8 +4425,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &cmd->work); } else if (ha->msix_count) { if (cmd->atio.u.isp24.fcp_cmnd.rddata) - queue_work_on(smp_processor_id(), qla_tgt_wq, - &cmd->work); + queue_work(qla_tgt_wq, &cmd->work); else queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &cmd->work); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 3b5ba4b47b3b..68a0e6a2fb6e 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -310,7 +310,7 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd) cmd->trc_flags |= TRC_CMD_DONE; INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free); - queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work); + queue_work(tcm_qla2xxx_free_wq, &cmd->work); } /* @@ -547,7 +547,7 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd) cmd->trc_flags |= TRC_DATA_IN; cmd->cmd_in_wq = 1; INIT_WORK(&cmd->work, tcm_qla2xxx_handle_data_work); - queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work); + queue_work(tcm_qla2xxx_free_wq, &cmd->work); } static int tcm_qla2xxx_chk_dif_tags(uint32_t tag) From d0b0822e32dbae80bbcb3cc86f34d28539d913df Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 31 Aug 2023 22:09:29 +0800 Subject: [PATCH 05/16] scsi: qla2xxx: Fix NULL vs IS_ERR() bug for debugfs_create_dir() Since both debugfs_create_dir() and debugfs_create_file() return ERR_PTR and never NULL, use IS_ERR() instead of checking for NULL. Fixes: 1e98fb0f9208 ("scsi: qla2xxx: Setup debugfs entries for remote ports") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20230831140930.3166359-1-ruanjinjie@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index f060e593685d..a7a364760b80 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -116,7 +116,7 @@ qla2x00_dfs_create_rport(scsi_qla_host_t *vha, struct fc_port *fp) sprintf(wwn, "pn-%016llx", wwn_to_u64(fp->port_name)); fp->dfs_rport_dir = debugfs_create_dir(wwn, vha->dfs_rport_root); - if (!fp->dfs_rport_dir) + if (IS_ERR(fp->dfs_rport_dir)) return; if (NVME_TARGET(vha->hw, fp)) debugfs_create_file("dev_loss_tmo", 0600, fp->dfs_rport_dir, @@ -708,14 +708,14 @@ create_nodes: if (IS_QLA27XX(ha) || IS_QLA83XX(ha) || IS_QLA28XX(ha)) { ha->tgt.dfs_naqp = debugfs_create_file("naqp", 0400, ha->dfs_dir, vha, &dfs_naqp_ops); - if (!ha->tgt.dfs_naqp) { + if (IS_ERR(ha->tgt.dfs_naqp)) { ql_log(ql_log_warn, vha, 0xd011, "Unable to create debugFS naqp node.\n"); goto out; } } vha->dfs_rport_root = debugfs_create_dir("rports", ha->dfs_dir); - if (!vha->dfs_rport_root) { + if (IS_ERR(vha->dfs_rport_root)) { ql_log(ql_log_warn, vha, 0xd012, "Unable to create debugFS rports node.\n"); goto out; From 5c584fe6098ae1727650acbabdef0669cefec7be Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Thu, 31 Aug 2023 14:36:38 +0000 Subject: [PATCH 06/16] scsi: target: Replace strlcpy() with strscpy() strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). Direct replacement is safe here since return value of -errno is used to check for truncation instead of sizeof(dest). [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Link: https://lore.kernel.org/r/20230831143638.232596-1-azeemshaikh38@gmail.com Reviewed-by: Kees Cook Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 936e5ff1b209..d5860c1c1f46 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1392,16 +1392,16 @@ static ssize_t target_wwn_vendor_id_store(struct config_item *item, /* +2 to allow for a trailing (stripped) '\n' and null-terminator */ unsigned char buf[INQUIRY_VENDOR_LEN + 2]; char *stripped = NULL; - size_t len; + ssize_t len; ssize_t ret; - len = strlcpy(buf, page, sizeof(buf)); - if (len < sizeof(buf)) { + len = strscpy(buf, page, sizeof(buf)); + if (len > 0) { /* Strip any newline added from userspace. */ stripped = strstrip(buf); len = strlen(stripped); } - if (len > INQUIRY_VENDOR_LEN) { + if (len < 0 || len > INQUIRY_VENDOR_LEN) { pr_err("Emulated T10 Vendor Identification exceeds" " INQUIRY_VENDOR_LEN: " __stringify(INQUIRY_VENDOR_LEN) "\n"); @@ -1448,16 +1448,16 @@ static ssize_t target_wwn_product_id_store(struct config_item *item, /* +2 to allow for a trailing (stripped) '\n' and null-terminator */ unsigned char buf[INQUIRY_MODEL_LEN + 2]; char *stripped = NULL; - size_t len; + ssize_t len; ssize_t ret; - len = strlcpy(buf, page, sizeof(buf)); - if (len < sizeof(buf)) { + len = strscpy(buf, page, sizeof(buf)); + if (len > 0) { /* Strip any newline added from userspace. */ stripped = strstrip(buf); len = strlen(stripped); } - if (len > INQUIRY_MODEL_LEN) { + if (len < 0 || len > INQUIRY_MODEL_LEN) { pr_err("Emulated T10 Vendor exceeds INQUIRY_MODEL_LEN: " __stringify(INQUIRY_MODEL_LEN) "\n"); @@ -1504,16 +1504,16 @@ static ssize_t target_wwn_revision_store(struct config_item *item, /* +2 to allow for a trailing (stripped) '\n' and null-terminator */ unsigned char buf[INQUIRY_REVISION_LEN + 2]; char *stripped = NULL; - size_t len; + ssize_t len; ssize_t ret; - len = strlcpy(buf, page, sizeof(buf)); - if (len < sizeof(buf)) { + len = strscpy(buf, page, sizeof(buf)); + if (len > 0) { /* Strip any newline added from userspace. */ stripped = strstrip(buf); len = strlen(stripped); } - if (len > INQUIRY_REVISION_LEN) { + if (len < 0 || len > INQUIRY_REVISION_LEN) { pr_err("Emulated T10 Revision exceeds INQUIRY_REVISION_LEN: " __stringify(INQUIRY_REVISION_LEN) "\n"); From 7df0b2605489bef3f4223ad66f1f9bb8d50d4cd2 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Fri, 1 Sep 2023 11:36:46 +0530 Subject: [PATCH 07/16] scsi: qedf: Add synchronization between I/O completions and abort Avoid race condition between I/O completion and abort processing by protecting the cmd_type with the rport lock. Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Link: https://lore.kernel.org/r/20230901060646.27885-1-skashyap@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 10 ++++++++-- drivers/scsi/qedf/qedf_main.c | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 4750ec5789a8..10fe3383855c 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1904,6 +1904,7 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) goto drop_rdata_kref; } + spin_lock_irqsave(&fcport->rport_lock, flags); if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) || test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags) || test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) { @@ -1911,17 +1912,20 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) "io_req xid=0x%x sc_cmd=%p already in cleanup or abort processing or already completed.\n", io_req->xid, io_req->sc_cmd); rc = 1; + spin_unlock_irqrestore(&fcport->rport_lock, flags); goto drop_rdata_kref; } + /* Set the command type to abort */ + io_req->cmd_type = QEDF_ABTS; + spin_unlock_irqrestore(&fcport->rport_lock, flags); + kref_get(&io_req->refcount); xid = io_req->xid; qedf->control_requests++; qedf->packet_aborts++; - /* Set the command type to abort */ - io_req->cmd_type = QEDF_ABTS; io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts; set_bit(QEDF_CMD_IN_ABORT, &io_req->flags); @@ -2210,7 +2214,9 @@ process_els: refcount, fcport, fcport->rdata->ids.port_id); /* Cleanup cmds re-use the same TID as the original I/O */ + spin_lock_irqsave(&fcport->rport_lock, flags); io_req->cmd_type = QEDF_CLEANUP; + spin_unlock_irqrestore(&fcport->rport_lock, flags); io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts; init_completion(&io_req->cleanup_done); diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 7825765c936c..91f3f1d7098e 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2805,6 +2805,8 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe) struct qedf_ioreq *io_req; struct qedf_rport *fcport; u32 comp_type; + u8 io_comp_type; + unsigned long flags; comp_type = (cqe->cqe_data >> FCOE_CQE_CQE_TYPE_SHIFT) & FCOE_CQE_CQE_TYPE_MASK; @@ -2838,11 +2840,14 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe) return; } + spin_lock_irqsave(&fcport->rport_lock, flags); + io_comp_type = io_req->cmd_type; + spin_unlock_irqrestore(&fcport->rport_lock, flags); switch (comp_type) { case FCOE_GOOD_COMPLETION_CQE_TYPE: atomic_inc(&fcport->free_sqes); - switch (io_req->cmd_type) { + switch (io_comp_type) { case QEDF_SCSI_CMD: qedf_scsi_completion(qedf, cqe, io_req); break; From 2d3f59cf868b4a2dd678a96cd49bdd91411bd59f Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Mon, 4 Sep 2023 10:30:44 +0900 Subject: [PATCH 08/16] scsi: ufs: core: Move __ufshcd_send_uic_cmd() outside host_lock __ufshcd_send_uic_cmd() is wrapped by uic_cmd_mutex and its related contexts are accessed within the section wrapped by uic_cmd_mutex. Thus, wrapping with host_lock is redundant. Signed-off-by: Kiwoong Kim Link: https://lore.kernel.org/r/782ba5f26f0a96e58d85dff50751787d2d2a6b2b.1693790060.git.kwmad.kim@samsung.com Reviewed-by: Bart Van Assche Reviewed-by: Chanwoo Lee Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 93417518c04d..f5e66d775b10 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2392,7 +2392,6 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, bool completion) { lockdep_assert_held(&hba->uic_cmd_mutex); - lockdep_assert_held(hba->host->host_lock); if (!ufshcd_ready_for_uic_cmd(hba)) { dev_err(hba->dev, @@ -2419,7 +2418,6 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { int ret; - unsigned long flags; if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) return 0; @@ -2428,9 +2426,7 @@ int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) mutex_lock(&hba->uic_cmd_mutex); ufshcd_add_delay_before_dme_cmd(hba); - spin_lock_irqsave(hba->host->host_lock, flags); ret = __ufshcd_send_uic_cmd(hba, uic_cmd, true); - spin_unlock_irqrestore(hba->host->host_lock, flags); if (!ret) ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd); @@ -4133,8 +4129,8 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) wmb(); reenable_intr = true; } - ret = __ufshcd_send_uic_cmd(hba, cmd, false); spin_unlock_irqrestore(hba->host->host_lock, flags); + ret = __ufshcd_send_uic_cmd(hba, cmd, false); if (ret) { dev_err(hba->dev, "pwr ctrl cmd 0x%x with mode 0x%x uic error %d\n", From d32533d30e2119b0c0aa17596734f1f842f750df Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Mon, 4 Sep 2023 10:30:45 +0900 Subject: [PATCH 09/16] scsi: ufs: core: Poll HCS.UCRDY before issuing a UIC command With auto hibern8 enabled, UIC could be busy processing a hibern8 operation and the HCI would reports UIC not ready for a short while through HCS.UCRDY. The UFS driver doesn't currently handle this situation. The UFSHCI spec specifies UCRDY like this: whether the host controller is ready to process UIC COMMAND The 'ready' could be seen as many different meanings. If the meaning includes not processing any request from HCI, processing a hibern8 operation can be 'not ready'. In this situation, the driver needs to wait until the operations is completed. Signed-off-by: Kiwoong Kim Link: https://lore.kernel.org/r/550484ffb66300bdcec63d3e304dfd55cb432f1f.1693790060.git.kwmad.kim@samsung.com Reviewed-by: Adrian Hunter Reviewed-by: Chanwoo Lee Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f5e66d775b10..c2df07545f96 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -2299,7 +2300,11 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) */ static inline bool ufshcd_ready_for_uic_cmd(struct ufs_hba *hba) { - return ufshcd_readl(hba, REG_CONTROLLER_STATUS) & UIC_COMMAND_READY; + u32 val; + int ret = read_poll_timeout(ufshcd_readl, val, val & UIC_COMMAND_READY, + 500, UIC_CMD_TIMEOUT * 1000, false, hba, + REG_CONTROLLER_STATUS); + return ret == 0 ? true : false; } /** From 71996bb835aed58c7ec4967be1d05190a27339ec Mon Sep 17 00:00:00 2001 From: Michal Grzedzicki Date: Wed, 13 Sep 2023 08:56:10 -0700 Subject: [PATCH 10/16] scsi: pm80xx: Use phy-specific SAS address when sending PHY_START command Some cards have more than one SAS address. Using an incorrect address causes communication issues with some devices like expanders. Closes: https://lore.kernel.org/linux-kernel/A57AEA84-5CA0-403E-8053-106033C73C70@fb.com/ Signed-off-by: Michal Grzedzicki Link: https://lore.kernel.org/r/20230913155611.3183612-1-mge@meta.com Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_hwi.c | 2 +- drivers/scsi/pm8001/pm80xx_hwi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 33053db5a713..90069c7b1642 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -4180,7 +4180,7 @@ pm8001_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) payload.sas_identify.dev_type = SAS_END_DEVICE; payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; memcpy(payload.sas_identify.sas_addr, - pm8001_ha->sas_addr, SAS_ADDR_SIZE); + &pm8001_ha->phy[phy_id].dev_sas_addr, SAS_ADDR_SIZE); payload.sas_identify.phy_id = phy_id; return pm8001_mpi_build_cmd(pm8001_ha, 0, opcode, &payload, diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index f6857632dc7c..1b2c40b1381c 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -4671,7 +4671,7 @@ pm80xx_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) payload.sas_identify.dev_type = SAS_END_DEVICE; payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; memcpy(payload.sas_identify.sas_addr, - &pm8001_ha->sas_addr, SAS_ADDR_SIZE); + &pm8001_ha->phy[phy_id].dev_sas_addr, SAS_ADDR_SIZE); payload.sas_identify.phy_id = phy_id; return pm8001_mpi_build_cmd(pm8001_ha, 0, opcode, &payload, From c13e7331745852d0dd7c35eabbe181cbd5b01172 Mon Sep 17 00:00:00 2001 From: Michal Grzedzicki Date: Mon, 11 Sep 2023 10:03:40 -0700 Subject: [PATCH 11/16] scsi: pm80xx: Avoid leaking tags when processing OPC_INB_SET_CONTROLLER_CONFIG command Tags allocated for OPC_INB_SET_CONTROLLER_CONFIG command need to be freed when we receive the response. Signed-off-by: Michal Grzedzicki Link: https://lore.kernel.org/r/20230911170340.699533-2-mge@meta.com Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 1b2c40b1381c..3afd9443c425 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3671,10 +3671,12 @@ static int mpi_set_controller_config_resp(struct pm8001_hba_info *pm8001_ha, (struct set_ctrl_cfg_resp *)(piomb + 4); u32 status = le32_to_cpu(pPayload->status); u32 err_qlfr_pgcd = le32_to_cpu(pPayload->err_qlfr_pgcd); + u32 tag = le32_to_cpu(pPayload->tag); pm8001_dbg(pm8001_ha, MSG, "SET CONTROLLER RESP: status 0x%x qlfr_pgcd 0x%x\n", status, err_qlfr_pgcd); + pm8001_tag_free(pm8001_ha, tag); return 0; } From c91774818b041ed290df29fb1dc0725be9b12e83 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Sep 2023 08:27:36 +0900 Subject: [PATCH 12/16] scsi: pm8001: Setup IRQs on resume The function pm8001_pci_resume() only calls pm8001_request_irq() without calling pm8001_setup_irq(). This causes the IRQ allocation to fail, which leads all drives being removed from the system. Fix this issue by integrating the code for pm8001_setup_irq() directly inside pm8001_request_irq() so that MSI-X setup is performed both during normal initialization and resume operations. Fixes: dbf9bfe61571 ("[SCSI] pm8001: add SAS/SATA HBA driver") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230911232745.325149-2-dlemoal@kernel.org Acked-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_init.c | 51 +++++++++++-------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 5e5ce1e74c3b..443a3176c6c0 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -273,7 +273,6 @@ static irqreturn_t pm8001_interrupt_handler_intx(int irq, void *dev_id) return ret; } -static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha); static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha); /** @@ -294,13 +293,6 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, pm8001_dbg(pm8001_ha, INIT, "pm8001_alloc: PHY:%x\n", pm8001_ha->chip->n_phy); - /* Setup Interrupt */ - rc = pm8001_setup_irq(pm8001_ha); - if (rc) { - pm8001_dbg(pm8001_ha, FAIL, - "pm8001_setup_irq failed [ret: %d]\n", rc); - goto err_out; - } /* Request Interrupt */ rc = pm8001_request_irq(pm8001_ha); if (rc) @@ -1031,47 +1023,38 @@ static u32 pm8001_request_msix(struct pm8001_hba_info *pm8001_ha) } #endif -static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha) -{ - struct pci_dev *pdev; - - pdev = pm8001_ha->pdev; - -#ifdef PM8001_USE_MSIX - if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) - return pm8001_setup_msix(pm8001_ha); - pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); -#endif - return 0; -} - /** * pm8001_request_irq - register interrupt * @pm8001_ha: our ha struct. */ static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha) { - struct pci_dev *pdev; + struct pci_dev *pdev = pm8001_ha->pdev; +#ifdef PM8001_USE_MSIX int rc; - pdev = pm8001_ha->pdev; + if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) { + rc = pm8001_setup_msix(pm8001_ha); + if (rc) { + pm8001_dbg(pm8001_ha, FAIL, + "pm8001_setup_irq failed [ret: %d]\n", rc); + return rc; + } -#ifdef PM8001_USE_MSIX - if (pdev->msix_cap && pci_msi_enabled()) - return pm8001_request_msix(pm8001_ha); - else { - pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); - goto intx; + if (pdev->msix_cap && pci_msi_enabled()) + return pm8001_request_msix(pm8001_ha); } + + pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); #endif -intx: /* initialize the INT-X interrupt */ pm8001_ha->irq_vector[0].irq_id = 0; pm8001_ha->irq_vector[0].drv_inst = pm8001_ha; - rc = request_irq(pdev->irq, pm8001_interrupt_handler_intx, IRQF_SHARED, - pm8001_ha->name, SHOST_TO_SAS_HA(pm8001_ha->shost)); - return rc; + + return request_irq(pdev->irq, pm8001_interrupt_handler_intx, + IRQF_SHARED, pm8001_ha->name, + SHOST_TO_SAS_HA(pm8001_ha->shost)); } /** From d14e3e553e05cb763964c991fe6acb0a6a1c6f9c Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Thu, 31 Aug 2023 20:34:59 +0200 Subject: [PATCH 13/16] scsi: target: core: Fix target_cmd_counter leak The target_cmd_counter struct allocated via target_alloc_cmd_counter() is never freed, resulting in leaks across various transport types, e.g.: unreferenced object 0xffff88801f920120 (size 96): comm "sh", pid 102, jiffies 4294892535 (age 713.412s) hex dump (first 32 bytes): 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 38 01 92 1f 80 88 ff ff ........8....... backtrace: [<00000000e58a6252>] kmalloc_trace+0x11/0x20 [<0000000043af4b2f>] target_alloc_cmd_counter+0x17/0x90 [target_core_mod] [<000000007da2dfa7>] target_setup_session+0x2d/0x140 [target_core_mod] [<0000000068feef86>] tcm_loop_tpg_nexus_store+0x19b/0x350 [tcm_loop] [<000000006a80e021>] configfs_write_iter+0xb1/0x120 [<00000000e9f4d860>] vfs_write+0x2e4/0x3c0 [<000000008143433b>] ksys_write+0x80/0xb0 [<00000000a7df29b2>] do_syscall_64+0x42/0x90 [<0000000053f45fb8>] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Free the structure alongside the corresponding iscsit_conn / se_sess parent. Signed-off-by: David Disseldorp Link: https://lore.kernel.org/r/20230831183459.6938-1-ddiss@suse.de Fixes: becd9be6069e ("scsi: target: Move sess cmd counter to new struct") Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 687adc9e086c..0686882bcbda 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -264,6 +264,7 @@ void target_free_cmd_counter(struct target_cmd_counter *cmd_cnt) percpu_ref_put(&cmd_cnt->refcnt); percpu_ref_exit(&cmd_cnt->refcnt); + kfree(cmd_cnt); } EXPORT_SYMBOL_GPL(target_free_cmd_counter); From 7dcc683db3639eadd11bf0d59a09088a43de5e22 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 6 Sep 2023 11:08:09 +0800 Subject: [PATCH 14/16] scsi: lpfc: Fix the NULL vs IS_ERR() bug for debugfs_create_file() Since debugfs_create_file() returns ERR_PTR and never NULL, use IS_ERR() to check the return value. Fixes: 2fcbc569b9f5 ("scsi: lpfc: Make debugfs ktime stats generic for NVME and SCSI") Fixes: 4c47efc140fa ("scsi: lpfc: Move SCSI and NVME Stats to hardware queue structures") Fixes: 6a828b0f6192 ("scsi: lpfc: Support non-uniform allocation of MSIX vectors to hardware queues") Fixes: 95bfc6d8ad86 ("scsi: lpfc: Make FW logging dynamically configurable") Fixes: 9f77870870d8 ("scsi: lpfc: Add debugfs support for cm framework buffers") Fixes: c490850a0947 ("scsi: lpfc: Adapt partitioned XRI lists to efficient sharing") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20230906030809.2847970-1-ruanjinjie@huawei.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 7f9b221e7c34..ea9b42225e62 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -6073,7 +6073,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) phba->hba_debugfs_root, phba, &lpfc_debugfs_op_multixripools); - if (!phba->debug_multixri_pools) { + if (IS_ERR(phba->debug_multixri_pools)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "0527 Cannot create debugfs multixripools\n"); goto debug_failed; @@ -6085,7 +6085,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, S_IFREG | 0644, phba->hba_debugfs_root, phba, &lpfc_cgn_buffer_op); - if (!phba->debug_cgn_buffer) { + if (IS_ERR(phba->debug_cgn_buffer)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "6527 Cannot create debugfs " "cgn_buffer\n"); @@ -6098,7 +6098,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, S_IFREG | 0644, phba->hba_debugfs_root, phba, &lpfc_rx_monitor_op); - if (!phba->debug_rx_monitor) { + if (IS_ERR(phba->debug_rx_monitor)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "6528 Cannot create debugfs " "rx_monitor\n"); @@ -6111,7 +6111,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, 0644, phba->hba_debugfs_root, phba, &lpfc_debugfs_ras_log); - if (!phba->debug_ras_log) { + if (IS_ERR(phba->debug_ras_log)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "6148 Cannot create debugfs" " ras_log\n"); @@ -6132,7 +6132,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) debugfs_create_file(name, S_IFREG | 0644, phba->hba_debugfs_root, phba, &lpfc_debugfs_op_lockstat); - if (!phba->debug_lockstat) { + if (IS_ERR(phba->debug_lockstat)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "4610 Can't create debugfs lockstat\n"); goto debug_failed; @@ -6358,7 +6358,7 @@ nvmeio_off: debugfs_create_file(name, 0644, vport->vport_debugfs_root, vport, &lpfc_debugfs_op_scsistat); - if (!vport->debug_scsistat) { + if (IS_ERR(vport->debug_scsistat)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "4611 Cannot create debugfs scsistat\n"); goto debug_failed; @@ -6369,7 +6369,7 @@ nvmeio_off: debugfs_create_file(name, 0644, vport->vport_debugfs_root, vport, &lpfc_debugfs_op_ioktime); - if (!vport->debug_ioktime) { + if (IS_ERR(vport->debug_ioktime)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "0815 Cannot create debugfs ioktime\n"); goto debug_failed; From 9c3034968ed0feeaf72e5b549b19c7767a1a04f2 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Fri, 8 Sep 2023 14:18:52 -0700 Subject: [PATCH 15/16] scsi: lpfc: Early return after marking final NLP_DROPPED flag in dev_loss_tmo When a dev_loss_tmo event occurs, an ndlp lock is taken before checking nlp_flag for NLP_DROPPED. There is an attempt to restore the ndlp lock when exiting the if statement, but the nlp_put kref could be the final decrement causing a use-after-free memory access on a released ndlp object. Instead of trying to reacquire the ndlp lock after checking nlp_flag, just return after calling nlp_put. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20230908211852.37576-1-justintee8345@gmail.com Reviewed-by: "Ewan D. Milne" Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 51afb60859eb..674dd07aae72 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -203,7 +203,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->nlp_flag |= NLP_DROPPED; spin_unlock_irqrestore(&ndlp->lock, iflags); lpfc_nlp_put(ndlp); - spin_lock_irqsave(&ndlp->lock, iflags); + return; } spin_unlock_irqrestore(&ndlp->lock, iflags); From dae40be7a1a72474e225795c0d6f43a4ac596a3f Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Fri, 8 Sep 2023 14:19:23 -0700 Subject: [PATCH 16/16] scsi: lpfc: Prevent use-after-free during rmmod with mapped NVMe rports During rmmod, when dev_loss_tmo callback is called, an ndlp kref count is decremented twice. Once for SCSI transport registration and second to remove the initial node allocation kref. If there is also an NVMe transport registration, another reference count decrement is expected in lpfc_nvme_unregister_port(). Race conditions between the NVMe transport remoteport_delete and dev_loss_tmo callbacks sometimes results in premature ndlp object release resulting in use-after-free issues. Fix by not dropping the ndlp object in dev_loss_tmo callback with an outstanding NVMe transport registration. Inversely, mark the final NLP_DROPPED flag in lpfc_nvme_unregister_port when rmmod flag is set. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20230908211923.37603-1-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 3 ++- drivers/scsi/lpfc/lpfc_nvme.c | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 674dd07aae72..5154eeaee0ec 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -199,7 +199,8 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) /* Only 1 thread can drop the initial node reference. If * another thread has set NLP_DROPPED, this thread is done. */ - if (!(ndlp->nlp_flag & NLP_DROPPED)) { + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD) && + !(ndlp->nlp_flag & NLP_DROPPED)) { ndlp->nlp_flag |= NLP_DROPPED; spin_unlock_irqrestore(&ndlp->lock, iflags); lpfc_nlp_put(ndlp); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 39acbcb7ec66..96e11a26c297 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -228,8 +228,7 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) spin_unlock_irq(&ndlp->lock); /* On a devloss timeout event, one more put is executed provided the - * NVME and SCSI rport unregister requests are complete. If the vport - * is unloading, this extra put is executed by lpfc_drop_node. + * NVME and SCSI rport unregister requests are complete. */ if (!(ndlp->fc4_xpt_flags & fc4_xpt_flags)) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); @@ -2567,11 +2566,7 @@ lpfc_nvme_rescan_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * nvme_transport perspective. Loss of an rport just means IO cannot * be sent and recovery is completely up to the initator. * For now, the driver just unbinds the DID and port_role so that - * no further IO can be issued. Changes are planned for later. - * - * Notes - the ndlp reference count is not decremented here since - * since there is no nvme_transport api for devloss. Node ref count - * is only adjusted in driver unload. + * no further IO can be issued. */ void lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) @@ -2646,6 +2641,21 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) "6167 NVME unregister failed %d " "port_state x%x\n", ret, remoteport->port_state); + + if (vport->load_flag & FC_UNLOADING) { + /* Only 1 thread can drop the initial node + * reference. Check if another thread has set + * NLP_DROPPED. + */ + spin_lock_irq(&ndlp->lock); + if (!(ndlp->nlp_flag & NLP_DROPPED)) { + ndlp->nlp_flag |= NLP_DROPPED; + spin_unlock_irq(&ndlp->lock); + lpfc_nlp_put(ndlp); + return; + } + spin_unlock_irq(&ndlp->lock); + } } } return;