diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 80d2ecb045df..701d27471858 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -34,3 +34,64 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) set_bit(PDSC_S_FW_DEAD, &pdsc->state); } + +static void pdsc_fw_down(struct pdsc *pdsc) +{ + if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) { + dev_err(pdsc->dev, "%s: already happening\n", __func__); + return; + } + + pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY); +} + +static void pdsc_fw_up(struct pdsc *pdsc) +{ + int err; + + if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) { + dev_err(pdsc->dev, "%s: fw not dead\n", __func__); + return; + } + + err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY); + if (err) + goto err_out; + + return; + +err_out: + pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY); +} + +void pdsc_health_thread(struct work_struct *work) +{ + struct pdsc *pdsc = container_of(work, struct pdsc, health_work); + unsigned long mask; + bool healthy; + + mutex_lock(&pdsc->config_lock); + + /* Don't do a check when in a transition state */ + mask = BIT_ULL(PDSC_S_INITING_DRIVER) | + BIT_ULL(PDSC_S_STOPPING_DRIVER); + if (pdsc->state & mask) + goto out_unlock; + + healthy = pdsc_is_fw_good(pdsc); + dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n", + __func__, healthy, pdsc->fw_status, pdsc->last_hb); + + if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) { + if (healthy) + pdsc_fw_up(pdsc); + } else { + if (!healthy) + pdsc_fw_down(pdsc); + } + + pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION; + +out_unlock: + mutex_unlock(&pdsc->config_lock); +} diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index fcf6c6545c49..83c528a2a131 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -12,6 +12,8 @@ #include #define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver" + +#define PDSC_WATCHDOG_SECS 5 #define PDSC_TEARDOWN_RECOVERY false #define PDSC_TEARDOWN_REMOVING true #define PDSC_SETUP_RECOVERY false @@ -63,12 +65,17 @@ struct pdsc { u8 fw_generation; unsigned long last_fw_time; u32 last_hb; + struct timer_list wdtimer; + unsigned int wdtimer_period; + struct work_struct health_work; struct pdsc_devinfo dev_info; struct pds_core_dev_identity dev_ident; unsigned int nintrs; struct pdsc_intr_info *intr_info; /* array of nintrs elements */ + struct workqueue_struct *wq; + unsigned int devcmd_timeout; struct mutex devcmd_lock; /* lock for dev_cmd operations */ struct mutex config_lock; /* lock for configuration operations */ @@ -102,5 +109,6 @@ int pdsc_dev_init(struct pdsc *pdsc); int pdsc_setup(struct pdsc *pdsc, bool init); void pdsc_teardown(struct pdsc *pdsc, bool removing); +void pdsc_health_thread(struct work_struct *work); #endif /* _PDSC_H_ */ diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index f082d69c5128..f7c597ea5daf 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -177,6 +177,9 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, err = pdsc_devcmd_wait(pdsc, max_seconds); memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); + if (err == -ENXIO || err == -ETIMEDOUT) + queue_work(pdsc->wq, &pdsc->health_work); + return err; } diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 09afb069dcb3..c9fbf1d374a7 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -20,6 +20,17 @@ static const struct pci_device_id pdsc_id_table[] = { }; MODULE_DEVICE_TABLE(pci, pdsc_id_table); +static void pdsc_wdtimer_cb(struct timer_list *t) +{ + struct pdsc *pdsc = from_timer(pdsc, t, wdtimer); + + dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies); + mod_timer(&pdsc->wdtimer, + round_jiffies(jiffies + pdsc->wdtimer_period)); + + queue_work(pdsc->wq, &pdsc->health_work); +} + static void pdsc_unmap_bars(struct pdsc *pdsc) { struct pdsc_dev_bar *bars = pdsc->bars; @@ -119,8 +130,11 @@ static int pdsc_init_vf(struct pdsc *vf) return -1; } +#define PDSC_WQ_NAME_LEN 24 + static int pdsc_init_pf(struct pdsc *pdsc) { + char wq_name[PDSC_WQ_NAME_LEN]; struct devlink *dl; int err; @@ -137,6 +151,13 @@ static int pdsc_init_pf(struct pdsc *pdsc) if (err) goto err_out_release_regions; + /* General workqueue and timer, but don't start timer yet */ + snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid); + pdsc->wq = create_singlethread_workqueue(wq_name); + INIT_WORK(&pdsc->health_work, pdsc_health_thread); + timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); + pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ; + mutex_init(&pdsc->devcmd_lock); mutex_init(&pdsc->config_lock); @@ -154,10 +175,16 @@ static int pdsc_init_pf(struct pdsc *pdsc) devl_register(dl); devl_unlock(dl); + /* Lastly, start the health check timer */ + mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period)); + return 0; err_out_unmap_bars: mutex_unlock(&pdsc->config_lock); + del_timer_sync(&pdsc->wdtimer); + if (pdsc->wq) + destroy_workqueue(pdsc->wq); mutex_destroy(&pdsc->config_lock); mutex_destroy(&pdsc->devcmd_lock); pci_free_irq_vectors(pdsc->pdev); @@ -259,6 +286,10 @@ static void pdsc_remove(struct pci_dev *pdev) devl_unlock(dl); if (!pdev->is_virtfn) { + del_timer_sync(&pdsc->wdtimer); + if (pdsc->wq) + destroy_workqueue(pdsc->wq); + mutex_lock(&pdsc->config_lock); set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);