scsi: lpfc: Skip waiting for register ready bits when in unrecoverable state

During tolerance tests that force an HBA to become unresponsive, rmmod
hangs resulting in the inability to remove the driver.

The lpfc_pci_remove_one_s4() routine attempts to submit a clean up mailbox
command via the lpfc_sli4_post_sync_mbox() routine, but ends up waiting
forever for a mailbox register to set its ready bit.  Because the HBA is in
an unrecoverable and unresponsive state, the ready bit will never be set.

Create a new routine called lpfc_sli4_unrecoverable_port(), which checks a
port status register's error notification bits.

Use the lpfc_sli4_unrecoverable_port() routine in ready bit check routines
to early return error if port is deemed unrecoverable.

Also, when the lpfc_handle_eratt_s4() handler detects an unrecoverable
state, call the lpfc_sli4_offline_eratt() routine to kick off flushing
outstanding I/O.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20230301231626.9621-8-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Justin Tee 2023-03-01 15:16:23 -08:00 committed by Martin K. Petersen
parent db651ec225
commit 27c2bcf00a
5 changed files with 49 additions and 5 deletions

View file

@ -1644,6 +1644,12 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba)
!bf_get(lpfc_sliport_status_err, &portstat_reg))
return -EPERM;
/* There is no point to wait if the port is in an unrecoverable
* state.
*/
if (lpfc_sli4_unrecoverable_port(&portstat_reg))
return -EIO;
/* wait for the SLI port firmware ready after firmware reset */
for (i = 0; i < LPFC_FW_RESET_MAXIMUM_WAIT_10MS_CNT; i++) {
msleep(10);

View file

@ -2148,7 +2148,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
/* fall through for not able to recover */
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"3152 Unrecoverable error\n");
phba->link_state = LPFC_HBA_ERROR;
lpfc_sli4_offline_eratt(phba);
break;
case LPFC_SLI_INTF_IF_TYPE_1:
default:
@ -9567,8 +9567,7 @@ lpfc_sli4_post_status_check(struct lpfc_hba *phba)
/* Final checks. The port status should be clean. */
if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
&reg_data.word0) ||
(bf_get(lpfc_sliport_status_err, &reg_data) &&
!bf_get(lpfc_sliport_status_rn, &reg_data))) {
lpfc_sli4_unrecoverable_port(&reg_data)) {
phba->work_status[0] =
readl(phba->sli4_hba.u.if_type2.
ERR1regaddr);

View file

@ -2265,6 +2265,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
}
if (!vport->localport ||
test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) ||
phba->link_state == LPFC_HBA_ERROR ||
vport->load_flag & FC_UNLOADING)
return;
@ -2630,7 +2631,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
* return values is ignored. The upcall is a courtesy to the
* transport.
*/
if (vport->load_flag & FC_UNLOADING)
if (vport->load_flag & FC_UNLOADING ||
unlikely(vport->phba->link_state == LPFC_HBA_ERROR))
(void)nvme_fc_set_remoteport_devloss(remoteport, 0);
ret = nvme_fc_unregister_remoteport(remoteport);

View file

@ -9895,7 +9895,8 @@ lpfc_sli4_async_mbox_unblock(struct lpfc_hba *phba)
* port for twice the regular mailbox command timeout value.
*
* 0 - no timeout on waiting for bootstrap mailbox register ready.
* MBXERR_ERROR - wait for bootstrap mailbox register timed out.
* MBXERR_ERROR - wait for bootstrap mailbox register timed out or port
* is in an unrecoverable state.
**/
static int
lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
@ -9903,6 +9904,23 @@ lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
uint32_t db_ready;
unsigned long timeout;
struct lpfc_register bmbx_reg;
struct lpfc_register portstat_reg = {-1};
/* Sanity check - there is no point to wait if the port is in an
* unrecoverable state.
*/
if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
LPFC_SLI_INTF_IF_TYPE_2) {
if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
&portstat_reg.word0) ||
lpfc_sli4_unrecoverable_port(&portstat_reg)) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3858 Skipping bmbx ready because "
"Port Status x%x\n",
portstat_reg.word0);
return MBXERR_ERROR;
}
}
timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mboxq)
* 1000) + jiffies;

View file

@ -1180,3 +1180,22 @@ static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
return q->q_pgs[idx / q->entry_cnt_per_pg] +
(q->entry_size * (idx % q->entry_cnt_per_pg));
}
/**
* lpfc_sli4_unrecoverable_port - Check ERR and RN bits in portstat_reg
* @portstat_reg: portstat_reg pointer containing portstat_reg contents
*
* Description:
* Use only for SLI4 interface type-2 or later. If ERR is set && RN is 0, then
* port is deemed unrecoverable.
*
* Returns:
* true - ERR && !RN
* false - otherwise
*/
static inline bool
lpfc_sli4_unrecoverable_port(struct lpfc_register *portstat_reg)
{
return bf_get(lpfc_sliport_status_err, portstat_reg) &&
!bf_get(lpfc_sliport_status_rn, portstat_reg);
}