habanalabs: handle events during soft-reset

Driver should handle events during soft-reset as F/W is not
going through reset and it keeps sending events towards host.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2021-11-21 16:02:32 +02:00 committed by Oded Gabbay
parent b13bef2041
commit 3416d4b59b
3 changed files with 7 additions and 1 deletions

View file

@ -1019,6 +1019,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
handle_reset_trigger(hdev, flags);
hdev->is_in_soft_reset = !hard_reset;
/* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true;
@ -1171,6 +1173,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
* is required for the initialization itself
*/
hdev->disabled = false;
hdev->is_in_soft_reset = false;
rc = hdev->asic_funcs->hw_init(hdev);
if (rc) {
@ -1242,6 +1245,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
out_err:
hdev->disabled = true;
hdev->is_in_soft_reset = false;
if (hard_reset) {
dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");

View file

@ -2591,6 +2591,7 @@ struct last_error_session_info {
* protocol will throw an error. Relevant only for
* cases where Linux was not loaded to device CPU
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
* @is_in_soft_reset: Device is currently in soft reset process.
*/
struct hl_device {
struct pci_dev *pdev;
@ -2719,6 +2720,7 @@ struct hl_device {
u8 device_cpu_is_halted;
u8 supports_wait_for_multi_cs;
u8 stream_master_qid_arr_size;
u8 is_in_soft_reset;
/* Parameters for bring-up */
u64 nic_ports_mask;

View file

@ -245,7 +245,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
*/
dma_rmb();
if (hdev->disabled) {
if (hdev->disabled && !hdev->is_in_soft_reset) {
dev_warn(hdev->dev, "Device disabled but received an EQ event\n");
goto skip_irq;
}