bna: IOC failure auto recovery fix

Change Details:
	- Made IOC auto_recovery synchronized and not timer based.
	- Only one PCI function will attempt to recover and reinitialize
	the ASIC on a failure, that too after all the active PCI
	functions acknowledge the IOC failure.

Signed-off-by: Debashis Dutt <ddutt@brocade.com>
Signed-off-by: Rasesh Mody <rmody@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Rasesh Mody 2010-12-23 21:45:09 +00:00 committed by David S. Miller
parent aad75b66f1
commit 1d32f76962
7 changed files with 1160 additions and 449 deletions

View file

@ -112,16 +112,18 @@ struct bfa_ioc_pci_attr {
* IOC states
*/
enum bfa_ioc_state {
BFA_IOC_RESET = 1, /*!< IOC is in reset state */
BFA_IOC_SEMWAIT = 2, /*!< Waiting for IOC h/w semaphore */
BFA_IOC_HWINIT = 3, /*!< IOC h/w is being initialized */
BFA_IOC_GETATTR = 4, /*!< IOC is being configured */
BFA_IOC_OPERATIONAL = 5, /*!< IOC is operational */
BFA_IOC_INITFAIL = 6, /*!< IOC hardware failure */
BFA_IOC_HBFAIL = 7, /*!< IOC heart-beat failure */
BFA_IOC_DISABLING = 8, /*!< IOC is being disabled */
BFA_IOC_DISABLED = 9, /*!< IOC is disabled */
BFA_IOC_FWMISMATCH = 10, /*!< IOC f/w different from drivers */
BFA_IOC_UNINIT = 1, /*!< IOC is in uninit state */
BFA_IOC_RESET = 2, /*!< IOC is in reset state */
BFA_IOC_SEMWAIT = 3, /*!< Waiting for IOC h/w semaphore */
BFA_IOC_HWINIT = 4, /*!< IOC h/w is being initialized */
BFA_IOC_GETATTR = 5, /*!< IOC is being configured */
BFA_IOC_OPERATIONAL = 6, /*!< IOC is operational */
BFA_IOC_INITFAIL = 7, /*!< IOC hardware failure */
BFA_IOC_FAIL = 8, /*!< IOC heart-beat failure */
BFA_IOC_DISABLING = 9, /*!< IOC is being disabled */
BFA_IOC_DISABLED = 10, /*!< IOC is disabled */
BFA_IOC_FWMISMATCH = 11, /*!< IOC f/w different from drivers */
BFA_IOC_ENABLING = 12, /*!< IOC is being enabled */
};
/**

File diff suppressed because it is too large Load diff

View file

@ -26,16 +26,7 @@
#define BFA_IOC_TOV 3000 /* msecs */
#define BFA_IOC_HWSEM_TOV 500 /* msecs */
#define BFA_IOC_HB_TOV 500 /* msecs */
#define BFA_IOC_HWINIT_MAX 2
#define BFA_IOC_TOV_RECOVER BFA_IOC_HB_TOV
/**
* Generic Scatter Gather Element used by driver
*/
struct bfa_sge {
u32 sg_len;
void *sg_addr;
};
#define BFA_IOC_HWINIT_MAX 5
/**
* PCI device information required by IOC
@ -64,19 +55,6 @@ struct bfa_dma {
#define BFI_SMEM_CB_SIZE 0x200000U /* ! 2MB for crossbow */
#define BFI_SMEM_CT_SIZE 0x280000U /* ! 2.5MB for catapult */
/**
* @brief BFA dma address assignment macro
*/
#define bfa_dma_addr_set(dma_addr, pa) \
__bfa_dma_addr_set(&dma_addr, (u64)pa)
static inline void
__bfa_dma_addr_set(union bfi_addr_u *dma_addr, u64 pa)
{
dma_addr->a32.addr_lo = (u32) pa;
dma_addr->a32.addr_hi = (u32) (upper_32_bits(pa));
}
/**
* @brief BFA dma address assignment macro. (big endian format)
*/
@ -105,8 +83,11 @@ struct bfa_ioc_regs {
void __iomem *host_page_num_fn;
void __iomem *heartbeat;
void __iomem *ioc_fwstate;
void __iomem *alt_ioc_fwstate;
void __iomem *ll_halt;
void __iomem *alt_ll_halt;
void __iomem *err_set;
void __iomem *ioc_fail_sync;
void __iomem *shirq_isr_next;
void __iomem *shirq_msk_next;
void __iomem *smem_page_start;
@ -165,16 +146,22 @@ struct bfa_ioc_hbfail_notify {
(__notify)->cbarg = (__cbarg); \
} while (0)
struct bfa_iocpf {
bfa_fsm_t fsm;
struct bfa_ioc *ioc;
u32 retry_count;
bool auto_recover;
};
struct bfa_ioc {
bfa_fsm_t fsm;
struct bfa *bfa;
struct bfa_pcidev pcidev;
struct bfa_timer_mod *timer_mod;
struct timer_list ioc_timer;
struct timer_list iocpf_timer;
struct timer_list sem_timer;
struct timer_list hb_timer;
u32 hb_count;
u32 retry_count;
struct list_head hb_notify_q;
void *dbg_fwsave;
int dbg_fwsave_len;
@ -182,7 +169,6 @@ struct bfa_ioc {
enum bfi_mclass ioc_mc;
struct bfa_ioc_regs ioc_regs;
struct bfa_ioc_drv_stats stats;
bool auto_recover;
bool fcmode;
bool ctdev;
bool cna;
@ -195,6 +181,7 @@ struct bfa_ioc {
struct bfa_ioc_cbfn *cbfn;
struct bfa_ioc_mbox_mod mbox_mod;
struct bfa_ioc_hwif *ioc_hwif;
struct bfa_iocpf iocpf;
};
struct bfa_ioc_hwif {
@ -205,8 +192,12 @@ struct bfa_ioc_hwif {
void (*ioc_map_port) (struct bfa_ioc *ioc);
void (*ioc_isr_mode_set) (struct bfa_ioc *ioc,
bool msix);
void (*ioc_notify_hbfail) (struct bfa_ioc *ioc);
void (*ioc_notify_fail) (struct bfa_ioc *ioc);
void (*ioc_ownership_reset) (struct bfa_ioc *ioc);
void (*ioc_sync_join) (struct bfa_ioc *ioc);
void (*ioc_sync_leave) (struct bfa_ioc *ioc);
void (*ioc_sync_ack) (struct bfa_ioc *ioc);
bool (*ioc_sync_complete) (struct bfa_ioc *ioc);
};
#define bfa_ioc_pcifn(__ioc) ((__ioc)->pcidev.pci_func)
@ -271,7 +262,6 @@ void bfa_nw_ioc_enable(struct bfa_ioc *ioc);
void bfa_nw_ioc_disable(struct bfa_ioc *ioc);
void bfa_nw_ioc_error_isr(struct bfa_ioc *ioc);
void bfa_nw_ioc_get_attr(struct bfa_ioc *ioc, struct bfa_ioc_attr *ioc_attr);
void bfa_nw_ioc_hbfail_register(struct bfa_ioc *ioc,
struct bfa_ioc_hbfail_notify *notify);
@ -289,7 +279,8 @@ mac_t bfa_nw_ioc_get_mac(struct bfa_ioc *ioc);
*/
void bfa_nw_ioc_timeout(void *ioc);
void bfa_nw_ioc_hb_check(void *ioc);
void bfa_nw_ioc_sem_timeout(void *ioc);
void bfa_nw_iocpf_timeout(void *ioc);
void bfa_nw_iocpf_sem_timeout(void *ioc);
/*
* F/W Image Size & Chunk

View file

@ -22,6 +22,15 @@
#include "bfi_ctreg.h"
#include "bfa_defs.h"
#define bfa_ioc_ct_sync_pos(__ioc) \
((u32) (1 << bfa_ioc_pcifn(__ioc)))
#define BFA_IOC_SYNC_REQD_SH 16
#define bfa_ioc_ct_get_sync_ackd(__val) (__val & 0x0000ffff)
#define bfa_ioc_ct_clear_sync_ackd(__val) (__val & 0xffff0000)
#define bfa_ioc_ct_get_sync_reqd(__val) (__val >> BFA_IOC_SYNC_REQD_SH)
#define bfa_ioc_ct_sync_reqd_pos(__ioc) \
(bfa_ioc_ct_sync_pos(__ioc) << BFA_IOC_SYNC_REQD_SH)
/*
* forward declarations
*/
@ -30,8 +39,12 @@ static void bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc);
static void bfa_ioc_ct_reg_init(struct bfa_ioc *ioc);
static void bfa_ioc_ct_map_port(struct bfa_ioc *ioc);
static void bfa_ioc_ct_isr_mode_set(struct bfa_ioc *ioc, bool msix);
static void bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc);
static void bfa_ioc_ct_notify_fail(struct bfa_ioc *ioc);
static void bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc);
static void bfa_ioc_ct_sync_join(struct bfa_ioc *ioc);
static void bfa_ioc_ct_sync_leave(struct bfa_ioc *ioc);
static void bfa_ioc_ct_sync_ack(struct bfa_ioc *ioc);
static bool bfa_ioc_ct_sync_complete(struct bfa_ioc *ioc);
static enum bfa_status bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode);
static struct bfa_ioc_hwif nw_hwif_ct;
@ -48,8 +61,12 @@ bfa_nw_ioc_set_ct_hwif(struct bfa_ioc *ioc)
nw_hwif_ct.ioc_reg_init = bfa_ioc_ct_reg_init;
nw_hwif_ct.ioc_map_port = bfa_ioc_ct_map_port;
nw_hwif_ct.ioc_isr_mode_set = bfa_ioc_ct_isr_mode_set;
nw_hwif_ct.ioc_notify_hbfail = bfa_ioc_ct_notify_hbfail;
nw_hwif_ct.ioc_notify_fail = bfa_ioc_ct_notify_fail;
nw_hwif_ct.ioc_ownership_reset = bfa_ioc_ct_ownership_reset;
nw_hwif_ct.ioc_sync_join = bfa_ioc_ct_sync_join;
nw_hwif_ct.ioc_sync_leave = bfa_ioc_ct_sync_leave;
nw_hwif_ct.ioc_sync_ack = bfa_ioc_ct_sync_ack;
nw_hwif_ct.ioc_sync_complete = bfa_ioc_ct_sync_complete;
ioc->ioc_hwif = &nw_hwif_ct;
}
@ -86,6 +103,7 @@ bfa_ioc_ct_firmware_lock(struct bfa_ioc *ioc)
if (usecnt == 0) {
writel(1, ioc->ioc_regs.ioc_usage_reg);
bfa_nw_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg);
writel(0, ioc->ioc_regs.ioc_fail_sync);
return true;
}
@ -149,12 +167,14 @@ bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc)
* Notify other functions on HB failure.
*/
static void
bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc)
bfa_ioc_ct_notify_fail(struct bfa_ioc *ioc)
{
if (ioc->cna) {
writel(__FW_INIT_HALT_P, ioc->ioc_regs.ll_halt);
writel(__FW_INIT_HALT_P, ioc->ioc_regs.alt_ll_halt);
/* Wait for halt to take effect */
readl(ioc->ioc_regs.ll_halt);
readl(ioc->ioc_regs.alt_ll_halt);
} else {
writel(__PSS_ERR_STATUS_SET, ioc->ioc_regs.err_set);
readl(ioc->ioc_regs.err_set);
@ -206,15 +226,19 @@ bfa_ioc_ct_reg_init(struct bfa_ioc *ioc)
if (ioc->port_id == 0) {
ioc->ioc_regs.heartbeat = rb + BFA_IOC0_HBEAT_REG;
ioc->ioc_regs.ioc_fwstate = rb + BFA_IOC0_STATE_REG;
ioc->ioc_regs.alt_ioc_fwstate = rb + BFA_IOC1_STATE_REG;
ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].hfn;
ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].lpu;
ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P0;
ioc->ioc_regs.alt_ll_halt = rb + FW_INIT_HALT_P1;
} else {
ioc->ioc_regs.heartbeat = (rb + BFA_IOC1_HBEAT_REG);
ioc->ioc_regs.ioc_fwstate = (rb + BFA_IOC1_STATE_REG);
ioc->ioc_regs.alt_ioc_fwstate = rb + BFA_IOC0_STATE_REG;
ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].hfn;
ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].lpu;
ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P1;
ioc->ioc_regs.alt_ll_halt = rb + FW_INIT_HALT_P0;
}
/*
@ -232,6 +256,7 @@ bfa_ioc_ct_reg_init(struct bfa_ioc *ioc)
ioc->ioc_regs.ioc_usage_sem_reg = (rb + HOST_SEM1_REG);
ioc->ioc_regs.ioc_init_sem_reg = (rb + HOST_SEM2_REG);
ioc->ioc_regs.ioc_usage_reg = (rb + BFA_FW_USE_COUNT);
ioc->ioc_regs.ioc_fail_sync = (rb + BFA_IOC_FAIL_SYNC);
/**
* sram memory access
@ -317,6 +342,77 @@ bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc)
bfa_nw_ioc_hw_sem_release(ioc);
}
/**
* Synchronized IOC failure processing routines
*/
static void
bfa_ioc_ct_sync_join(struct bfa_ioc *ioc)
{
u32 r32 = readl(ioc->ioc_regs.ioc_fail_sync);
u32 sync_pos = bfa_ioc_ct_sync_reqd_pos(ioc);
writel((r32 | sync_pos), ioc->ioc_regs.ioc_fail_sync);
}
static void
bfa_ioc_ct_sync_leave(struct bfa_ioc *ioc)
{
u32 r32 = readl(ioc->ioc_regs.ioc_fail_sync);
u32 sync_msk = bfa_ioc_ct_sync_reqd_pos(ioc) |
bfa_ioc_ct_sync_pos(ioc);
writel((r32 & ~sync_msk), ioc->ioc_regs.ioc_fail_sync);
}
static void
bfa_ioc_ct_sync_ack(struct bfa_ioc *ioc)
{
u32 r32 = readl(ioc->ioc_regs.ioc_fail_sync);
writel((r32 | bfa_ioc_ct_sync_pos(ioc)), ioc->ioc_regs.ioc_fail_sync);
}
static bool
bfa_ioc_ct_sync_complete(struct bfa_ioc *ioc)
{
u32 r32 = readl(ioc->ioc_regs.ioc_fail_sync);
u32 sync_reqd = bfa_ioc_ct_get_sync_reqd(r32);
u32 sync_ackd = bfa_ioc_ct_get_sync_ackd(r32);
u32 tmp_ackd;
if (sync_ackd == 0)
return true;
/**
* The check below is to see whether any other PCI fn
* has reinitialized the ASIC (reset sync_ackd bits)
* and failed again while this IOC was waiting for hw
* semaphore (in bfa_iocpf_sm_semwait()).
*/
tmp_ackd = sync_ackd;
if ((sync_reqd & bfa_ioc_ct_sync_pos(ioc)) &&
!(sync_ackd & bfa_ioc_ct_sync_pos(ioc)))
sync_ackd |= bfa_ioc_ct_sync_pos(ioc);
if (sync_reqd == sync_ackd) {
writel(bfa_ioc_ct_clear_sync_ackd(r32),
ioc->ioc_regs.ioc_fail_sync);
writel(BFI_IOC_FAIL, ioc->ioc_regs.ioc_fwstate);
writel(BFI_IOC_FAIL, ioc->ioc_regs.alt_ioc_fwstate);
return true;
}
/**
* If another PCI fn reinitialized and failed again while
* this IOC was waiting for hw sem, the sync_ackd bit for
* this IOC need to be set again to allow reinitialization.
*/
if (tmp_ackd != sync_ackd)
writel((r32 | sync_ackd), ioc->ioc_regs.ioc_fail_sync);
return false;
}
static enum bfa_status
bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode)
{

View file

@ -535,6 +535,7 @@ enum {
#define BFA_IOC1_HBEAT_REG HOST_SEM2_INFO_REG
#define BFA_IOC1_STATE_REG HOST_SEM3_INFO_REG
#define BFA_FW_USE_COUNT HOST_SEM4_INFO_REG
#define BFA_IOC_FAIL_SYNC HOST_SEM5_INFO_REG
#define CPE_DEPTH_Q(__n) \
(CPE_DEPTH_Q0 + (__n) * (CPE_DEPTH_Q1 - CPE_DEPTH_Q0))
@ -552,22 +553,30 @@ enum {
(RME_PI_PTR_Q0 + (__n) * (RME_PI_PTR_Q1 - RME_PI_PTR_Q0))
#define RME_CI_PTR_Q(__n) \
(RME_CI_PTR_Q0 + (__n) * (RME_CI_PTR_Q1 - RME_CI_PTR_Q0))
#define HQM_QSET_RXQ_DRBL_P0(__n) (HQM_QSET0_RXQ_DRBL_P0 + (__n) \
* (HQM_QSET1_RXQ_DRBL_P0 - HQM_QSET0_RXQ_DRBL_P0))
#define HQM_QSET_TXQ_DRBL_P0(__n) (HQM_QSET0_TXQ_DRBL_P0 + (__n) \
* (HQM_QSET1_TXQ_DRBL_P0 - HQM_QSET0_TXQ_DRBL_P0))
#define HQM_QSET_IB_DRBL_1_P0(__n) (HQM_QSET0_IB_DRBL_1_P0 + (__n) \
* (HQM_QSET1_IB_DRBL_1_P0 - HQM_QSET0_IB_DRBL_1_P0))
#define HQM_QSET_IB_DRBL_2_P0(__n) (HQM_QSET0_IB_DRBL_2_P0 + (__n) \
* (HQM_QSET1_IB_DRBL_2_P0 - HQM_QSET0_IB_DRBL_2_P0))
#define HQM_QSET_RXQ_DRBL_P1(__n) (HQM_QSET0_RXQ_DRBL_P1 + (__n) \
* (HQM_QSET1_RXQ_DRBL_P1 - HQM_QSET0_RXQ_DRBL_P1))
#define HQM_QSET_TXQ_DRBL_P1(__n) (HQM_QSET0_TXQ_DRBL_P1 + (__n) \
* (HQM_QSET1_TXQ_DRBL_P1 - HQM_QSET0_TXQ_DRBL_P1))
#define HQM_QSET_IB_DRBL_1_P1(__n) (HQM_QSET0_IB_DRBL_1_P1 + (__n) \
* (HQM_QSET1_IB_DRBL_1_P1 - HQM_QSET0_IB_DRBL_1_P1))
#define HQM_QSET_IB_DRBL_2_P1(__n) (HQM_QSET0_IB_DRBL_2_P1 + (__n) \
* (HQM_QSET1_IB_DRBL_2_P1 - HQM_QSET0_IB_DRBL_2_P1))
#define HQM_QSET_RXQ_DRBL_P0(__n) \
(HQM_QSET0_RXQ_DRBL_P0 + (__n) * \
(HQM_QSET1_RXQ_DRBL_P0 - HQM_QSET0_RXQ_DRBL_P0))
#define HQM_QSET_TXQ_DRBL_P0(__n) \
(HQM_QSET0_TXQ_DRBL_P0 + (__n) * \
(HQM_QSET1_TXQ_DRBL_P0 - HQM_QSET0_TXQ_DRBL_P0))
#define HQM_QSET_IB_DRBL_1_P0(__n) \
(HQM_QSET0_IB_DRBL_1_P0 + (__n) * \
(HQM_QSET1_IB_DRBL_1_P0 - HQM_QSET0_IB_DRBL_1_P0))
#define HQM_QSET_IB_DRBL_2_P0(__n) \
(HQM_QSET0_IB_DRBL_2_P0 + (__n) * \
(HQM_QSET1_IB_DRBL_2_P0 - HQM_QSET0_IB_DRBL_2_P0))
#define HQM_QSET_RXQ_DRBL_P1(__n) \
(HQM_QSET0_RXQ_DRBL_P1 + (__n) * \
(HQM_QSET1_RXQ_DRBL_P1 - HQM_QSET0_RXQ_DRBL_P1))
#define HQM_QSET_TXQ_DRBL_P1(__n) \
(HQM_QSET0_TXQ_DRBL_P1 + (__n) * \
(HQM_QSET1_TXQ_DRBL_P1 - HQM_QSET0_TXQ_DRBL_P1))
#define HQM_QSET_IB_DRBL_1_P1(__n) \
(HQM_QSET0_IB_DRBL_1_P1 + (__n) * \
(HQM_QSET1_IB_DRBL_1_P1 - HQM_QSET0_IB_DRBL_1_P1))
#define HQM_QSET_IB_DRBL_2_P1(__n) \
(HQM_QSET0_IB_DRBL_2_P1 + (__n) * \
(HQM_QSET1_IB_DRBL_2_P1 - HQM_QSET0_IB_DRBL_2_P1))
#define CPE_Q_NUM(__fn, __q) (((__fn) << 2) + (__q))
#define RME_Q_NUM(__fn, __q) (((__fn) << 2) + (__q))

View file

@ -32,8 +32,6 @@ extern const u32 bna_napi_dim_vector[][BNA_BIAS_T_MAX];
/* Log string size */
#define BNA_MESSAGE_SIZE 256
#define bna_device_timer(_dev) bfa_timer_beat(&((_dev)->timer_mod))
/* MBOX API for PORT, TX, RX */
#define bna_mbox_qe_fill(_qe, _cmd, _cmd_len, _cbfn, _cbarg) \
do { \

View file

@ -1425,13 +1425,24 @@ bnad_ioc_hb_check(unsigned long data)
}
static void
bnad_ioc_sem_timeout(unsigned long data)
bnad_iocpf_timeout(unsigned long data)
{
struct bnad *bnad = (struct bnad *)data;
unsigned long flags;
spin_lock_irqsave(&bnad->bna_lock, flags);
bfa_nw_ioc_sem_timeout((void *) &bnad->bna.device.ioc);
bfa_nw_iocpf_timeout((void *) &bnad->bna.device.ioc);
spin_unlock_irqrestore(&bnad->bna_lock, flags);
}
static void
bnad_iocpf_sem_timeout(unsigned long data)
{
struct bnad *bnad = (struct bnad *)data;
unsigned long flags;
spin_lock_irqsave(&bnad->bna_lock, flags);
bfa_nw_iocpf_sem_timeout((void *) &bnad->bna.device.ioc);
spin_unlock_irqrestore(&bnad->bna_lock, flags);
}
@ -3132,11 +3143,13 @@ bnad_pci_probe(struct pci_dev *pdev,
((unsigned long)bnad));
setup_timer(&bnad->bna.device.ioc.hb_timer, bnad_ioc_hb_check,
((unsigned long)bnad));
setup_timer(&bnad->bna.device.ioc.sem_timer, bnad_ioc_sem_timeout,
setup_timer(&bnad->bna.device.ioc.iocpf_timer, bnad_iocpf_timeout,
((unsigned long)bnad));
setup_timer(&bnad->bna.device.ioc.sem_timer, bnad_iocpf_sem_timeout,
((unsigned long)bnad));
/* Now start the timer before calling IOC */
mod_timer(&bnad->bna.device.ioc.ioc_timer,
mod_timer(&bnad->bna.device.ioc.iocpf_timer,
jiffies + msecs_to_jiffies(BNA_IOC_TIMER_FREQ));
/*