From 5d89ce6f8c2775b926b63c14529c5970429ea935 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 9 May 2023 13:51:59 +0300
Subject: [PATCH 01/77] accel/habanalabs: prevent immediate hard reset due to 2
 adjacent H/W events

When a H/W event is received while a user is registered to events, no
immediate hard reset will happen, and instead the user will be notified
and will have some time to handle it and eventually release the
device, after which the reset will be done.
If a user, as part of the handling and as part of the cleanup steps
towards releasing the device, unregisters from receiving those events,
and at that time an adjacent H/W event is received, it will be assumed
that the user is not registered to events and thus an immediate hard
reset is required.

To prevent such an unwanted immediate reset, modify the driver to
perform it if the user is not registered to events AND we don't already
have a pending reset for a previous H/W event.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index b97339d1f7c6..1e61e79c42e5 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1916,7 +1916,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 	}
 
 	ctx = hl_get_compute_ctx(hdev);
-	if (!ctx || !ctx->hpriv->notifier_event.eventfd)
+	if (!ctx)
+		goto device_reset;
+
+	/*
+	 * There is no point in postponing the reset if user is not registered for events.
+	 * However if no eventfd_ctx exists but the device release watchdog is already scheduled, it
+	 * just implies that user has unregistered as part of handling a previous event. In this
+	 * case an immediate reset is not required.
+	 */
+	if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active)
 		goto device_reset;
 
 	/* Schedule the device release watchdog work unless reset is already in progress or if the

From a35c99760146137bbcfafe82ed5dc9c7a00b640a Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 10 May 2023 18:18:05 +0300
Subject: [PATCH 02/77] accel/habanalabs: update pending reset flags with new
 reset requests

If hl_device_cond_reset() is called while a reset is already pending but
hasn't started, the reset request will be dropped.
If the flags of the new request are more severe, e.g. a hard reset while
the pending reset is a compute reset, the eventual reset won't be
suitable for the device status.

To prevent such cases, update the pending reset flags with the new
requests flags before the requests are dropped.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 1e61e79c42e5..993305871292 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1937,8 +1937,10 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 		goto device_reset;
 	}
 
-	if (hdev->reset_info.watchdog_active)
+	if (hdev->reset_info.watchdog_active) {
+		hdev->device_release_watchdog_work.flags |= flags;
 		goto out;
+	}
 
 	hdev->device_release_watchdog_work.flags = flags;
 	dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",

From c6a4f256aee17e18dbc14039d746e0450e4b199b Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Mon, 15 May 2023 13:56:25 +0300
Subject: [PATCH 03/77] accel/habanalabs: notify user about undefined opcode
 event

In order for user to be aware of undefined opcode events, we must
store all relevant information and notify user about the failure.
The user will fetch the stored info via info ioctl.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 142 ++++++++++++++++++++++-
 1 file changed, 137 insertions(+), 5 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 20c4583f12b0..ed3b0b6225d2 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -993,6 +993,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
 	"TLP is blocked by RR"
 };
 
+static const int gaudi2_queue_id_to_engine_id[] = {
+	[GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
+	[GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
+							GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
+							GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
+							GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
+							GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_6,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
+	[GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
+	[GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
+	[GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
+	[GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
+	[GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
+	[GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
+	[GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
+	[GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
+	[GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
+	[GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
+	[GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
+	[GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
+	[GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
+	[GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
+	[GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
+	[GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
+	[GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
+	[GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
+	[GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
+	[GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
+	[GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
+	[GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
+	[GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
+	[GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
+	[GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
+};
+
 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
@@ -7753,7 +7858,7 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 	return !!ecc_data->is_critical;
 }
 
-static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
 {
 	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
 	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
@@ -7775,10 +7880,22 @@ static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
 	dev_info(hdev->dev,
 		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
 		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
+
+	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
+		if (arc_cq_ptr) {
+			hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
+			hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
+		} else {
+			hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
+			hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
+		}
+
+		hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
+	}
 }
 
 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
-							u64 qman_base, u32 qid_base)
+						u64 qman_base, u32 qid_base, u64 *event_mask)
 {
 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
 	u64 glbl_sts_addr, arb_err_addr;
@@ -7812,8 +7929,22 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
 				error_count++;
 			}
 
-		if (i == QMAN_STREAMS)
-			print_lower_qman_data_on_err(hdev, qman_base);
+		if (i == QMAN_STREAMS && error_count) {
+			/* check for undefined opcode */
+			if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
+					hdev->captured_err_info.undef_opcode.write_enable) {
+				memset(&hdev->captured_err_info.undef_opcode, 0,
+						sizeof(hdev->captured_err_info.undef_opcode));
+
+				hdev->captured_err_info.undef_opcode.write_enable = false;
+				hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
+				hdev->captured_err_info.undef_opcode.engine_id =
+							gaudi2_queue_id_to_engine_id[qid_base];
+				*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
+			}
+
+			handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
+		}
 	}
 
 	arb_err_val = RREG32(arb_err_addr);
@@ -8475,7 +8606,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 		return 0;
 	}
 
-	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
+	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
+								qid_base, event_mask);
 
 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {

From 7dccb064a7ab54068b5a92466361dcd0db069c73 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 16 May 2023 11:10:52 +0300
Subject: [PATCH 04/77] accel/habanalabs: print task name and request code upon
 ioctl failure

When an ioctl fails, it is useful to know what is the task command name
and the full ioctl request code, in addition to the task pid and the
ioctl number.
Add the additional information to the relevant debug error prints.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/habanalabs_ioctl.c      | 24 +++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 6a45a92344e9..549b2518fae0 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -1194,9 +1194,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
 		retcode = -EFAULT;
 
 out_err:
-	if (retcode)
-		dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
-			  task_pid_nr(current), cmd, nr);
+	if (retcode) {
+		char task_comm[TASK_COMM_LEN];
+
+		dev_dbg_ratelimited(dev,
+				"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
+				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
+	}
 
 	if (kdata != stack_kdata)
 		kfree(kdata);
@@ -1219,8 +1223,11 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 	if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
 		ioctl = &hl_ioctls[nr];
 	} else {
-		dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
-			task_pid_nr(current), nr);
+		char task_comm[TASK_COMM_LEN];
+
+		dev_dbg_ratelimited(hdev->dev,
+				"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
+				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
 		return -ENOTTY;
 	}
 
@@ -1242,8 +1249,11 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 	if (nr == _IOC_NR(HL_IOCTL_INFO)) {
 		ioctl = &hl_ioctls_control[nr];
 	} else {
-		dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
-			task_pid_nr(current), nr);
+		char task_comm[TASK_COMM_LEN];
+
+		dev_dbg_ratelimited(hdev->dev_ctrl,
+				"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
+				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
 		return -ENOTTY;
 	}
 

From e7b2902a330eba354024581edead0002521bcedf Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 22 May 2023 11:20:48 +0300
Subject: [PATCH 05/77] accel/habanalabs: print task name upon creation of a
 user context

It is useful for debug to know which user process have acquired the
device.
Add this info to the relevant debug print, in addition to the already
printed user context's ASID.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/context.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c
index 9c8b1b37b510..0a53f7154739 100644
--- a/drivers/accel/habanalabs/common/context.c
+++ b/drivers/accel/habanalabs/common/context.c
@@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	kfree(ctx->cs_pending);
 
 	if (ctx->asid != HL_KERNEL_ASID_ID) {
-		dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid);
+		dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid);
 
 		/* The engines are stopped as there is no executing CS, but the
 		 * Coresight might be still working by accessing addresses
@@ -198,6 +198,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
 
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 {
+	char task_comm[TASK_COMM_LEN];
 	int rc = 0, i;
 
 	ctx->hdev = hdev;
@@ -267,7 +268,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 
 		hl_encaps_sig_mgr_init(&ctx->sig_mgr);
 
-		dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
+		dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
+			get_task_comm(task_comm, current), ctx->asid);
 	}
 
 	return 0;

From e4a97d6b62599cc6c4bd37674de378b2a86225c3 Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Mon, 29 May 2023 11:41:04 +0300
Subject: [PATCH 06/77] accel/habanalabs: set device status 'malfunction' while
 in rmmod

hl_device_status() returns the status of an acquired device.
If a device is going down (following an rmmod cmd),
it should be marked as an unusable/malfunctioning device, and
hence should not be acquired.
However, since this was not the case so far (i.e., a device going
down would inaccurately return 'in reset' status allowing the user
to acquire the device) it introduced a bug where as part of a reset
flow, the driver could not kill processes that have not run yet, and
since those processes aren't blocked from reacquiring a device,
we get eventually a new flow of a driver attempting to kill all
processes in a list that can't be ever really empty.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 993305871292..5973e4d64e19 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -315,7 +315,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;
 
-	if (hdev->reset_info.in_reset) {
+	if (hdev->device_fini_pending) {
+		status = HL_DEVICE_STATUS_MALFUNCTION;
+	} else if (hdev->reset_info.in_reset) {
 		if (hdev->reset_info.in_compute_reset)
 			status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
 		else
@@ -343,9 +345,9 @@ bool hl_device_operational(struct hl_device *hdev,
 		*status = current_status;
 
 	switch (current_status) {
+	case HL_DEVICE_STATUS_MALFUNCTION:
 	case HL_DEVICE_STATUS_IN_RESET:
 	case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
-	case HL_DEVICE_STATUS_MALFUNCTION:
 	case HL_DEVICE_STATUS_NEEDS_RESET:
 		return false;
 	case HL_DEVICE_STATUS_OPERATIONAL:

From f17182d03680044649d6e124a4580a33c65990e2 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Wed, 31 May 2023 12:40:41 +0300
Subject: [PATCH 07/77] accel/habanalabs: stop fetching MME SBTE error cause

Because in this case we have only a single possible cause, we can
safely stop fetching the cause from firmware.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 31 ++++++------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index ed3b0b6225d2..899b1c4b53f6 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -66,7 +66,6 @@
 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		31
 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
-#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
@@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
 	"sbte_prtn_intr_4",
 };
 
-static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
-	"i0",
-	"i1",
-	"i2",
-	"i3",
-	"i4",
-};
-
 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
 	"WBC ERR RESP_0",
 	"WBC ERR RESP_1",
@@ -8781,21 +8772,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
 	return error_count;
 }
 
-static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
-					u64 intr_cause_data)
+static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
 {
-	int i, error_count = 0;
-
-	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
-		if (intr_cause_data & BIT(i)) {
-			gaudi2_print_event(hdev, event_type, true,
-				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
-			error_count++;
-		}
-
+	/*
+	 * We have a single error cause here but the report mechanism is
+	 * buggy. Hence there is no good reason to fetch the cause so we
+	 * just check for glbl_errors and exit.
+	 */
 	hl_check_for_glbl_errors(hdev);
 
-	return error_count;
+	return GAUDI2_NA_EVENT_CAUSE;
 }
 
 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
@@ -9856,8 +9842,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
-		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
-						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:

From 43d8acce607c90a44653c80f3bf211f31abc0416 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Wed, 29 Mar 2023 20:21:56 +0300
Subject: [PATCH 08/77] accel/habanalabs: handle arc farm razwi

Implement razwi handling for arc farm and add it to arc farm sei
event handler.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 899b1c4b53f6..1085215ac51d 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -2097,7 +2097,8 @@ enum razwi_event_sources {
 	RAZWI_PDMA,
 	RAZWI_NIC,
 	RAZWI_DEC,
-	RAZWI_ROT
+	RAZWI_ROT,
+	RAZWI_ARC_FARM
 };
 
 struct hbm_mc_error_causes {
@@ -8049,6 +8050,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
 	case RAZWI_ROT:
 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
 
+	case RAZWI_ARC_FARM:
+		return GAUDI2_ENGINE_ID_ARC_FARM;
+
 	default:
 		return GAUDI2_ENGINE_ID_SIZE;
 	}
@@ -8158,6 +8162,11 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
 		sprintf(initiator_name, "ROT_%u", module_idx);
 		break;
+	case RAZWI_ARC_FARM:
+		lbw_rtr_id = DCORE1_RTR5;
+		hbw_rtr_id = DCORE1_RTR7;
+		sprintf(initiator_name, "ARC_FARM_%u", module_idx);
+		break;
 	default:
 		return;
 	}
@@ -8611,7 +8620,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 	return error_count;
 }
 
-static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
+static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
 {
 	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
 
@@ -8633,6 +8642,7 @@ static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type
 				sts_clr_val);
 	}
 
+	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
 	hl_check_for_glbl_errors(hdev);
 
 	return error_count;
@@ -9619,7 +9629,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		break;
 
 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
-		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
+		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 

From bffd2f16ae374decdeb5bc714b1aa2e0c23f3708 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Sun, 4 Jun 2023 11:19:19 +0300
Subject: [PATCH 09/77] accel/habanalabs: fix standalone preboot descriptor
 request

The preboot used to statically allocate memory for the comms descriptor
on the device memory when driver requested the descriptor information.
Now preboot moved to dynamic memory allocation where it wants to check
the size the driver expects vs. what the f/w expects.
Note there are no backward compatibility issues as older f/w versions
simply ignore this value.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/firmware_if.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index acbc1a6b5cb1..370508e98854 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -2743,7 +2743,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
 		struct lkd_fw_binning_info *binning_info;
 
-		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
+		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
+							sizeof(struct lkd_msg_comms));
 		if (rc)
 			goto protocol_err;
 

From 2b541cf91373b1bc1634a82b668022d50c05603c Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Mon, 5 Jun 2023 11:11:05 +0300
Subject: [PATCH 10/77] accel/habanalabs: print return code when process
 termination fails

As part of driver teardown, we attempt to kill all user processes.
It shouldn't fail, but if it does we want to print the error code that
the kapi returned to us.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 5973e4d64e19..764d40c0d666 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1352,7 +1352,8 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 			 * we don't need to kill it.
 			 */
 			dev_dbg(hdev->dev,
-				"Can't get task struct for user process, assuming process was killed from outside the driver\n");
+				"Can't get task struct for user process %d, assuming process was killed from outside the driver\n",
+				pid_nr(hpriv->taskpid));
 		}
 	}
 
@@ -2438,14 +2439,14 @@ void hl_device_fini(struct hl_device *hdev)
 	hdev->process_kill_trial_cnt = 0;
 	rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false);
 	if (rc) {
-		dev_crit(hdev->dev, "Failed to kill all open processes\n");
+		dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc);
 		device_disable_open_processes(hdev, false);
 	}
 
 	hdev->process_kill_trial_cnt = 0;
 	rc = device_kill_open_processes(hdev, 0, true);
 	if (rc) {
-		dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
+		dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc);
 		device_disable_open_processes(hdev, true);
 	}
 

From 942f18c56d2106115bcf344cc7b0f762c6fc55c2 Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Tue, 6 Jun 2023 11:57:51 +0300
Subject: [PATCH 11/77] accel/habanalabs: call put_pid after hpriv list is
 updated

Because we might still be using related resources, decrementing PID's
reference count should be done at later stages of the device release.
A good place is right after the representing private structure is
removed from LKD's list.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 764d40c0d666..c61a58a2e622 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -408,8 +408,6 @@ static void hpriv_release(struct kref *ref)
 
 	hdev->asic_funcs->send_device_activity(hdev, false);
 
-	put_pid(hpriv->taskpid);
-
 	hl_debugfs_remove_file(hpriv);
 
 	mutex_destroy(&hpriv->ctx_lock);
@@ -448,6 +446,8 @@ static void hpriv_release(struct kref *ref)
 	list_del(&hpriv->dev_node);
 	mutex_unlock(&hdev->fpriv_list_lock);
 
+	put_pid(hpriv->taskpid);
+
 	if (reset_device) {
 		hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
 	} else {

From 964b1f675dac62763feaa6cec188be9f92aa9e57 Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Tue, 6 Jun 2023 14:44:57 +0300
Subject: [PATCH 12/77] accel/habanalabs: rename fd_list to hpriv_list

Every time an FD is returned to the user, the driver adds
a corresponding private structure to the list.
Yet, it's still a list of private structures rather than of FDs.
Remove, as well, an unnecessary comment.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 43 +++++++++++-------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index c61a58a2e622..0d02f1f7b994 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1304,18 +1304,18 @@ int hl_device_resume(struct hl_device *hdev)
 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
 {
 	struct task_struct *task = NULL;
-	struct list_head *fd_list;
-	struct hl_fpriv	*hpriv;
-	struct mutex *fd_lock;
+	struct list_head *hpriv_list;
+	struct hl_fpriv *hpriv;
+	struct mutex *hpriv_lock;
 	u32 pending_cnt;
 
-	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
-	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+	hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+	hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
 
 	/* Giving time for user to close FD, and for processes that are inside
 	 * hl_device_open to finish
 	 */
-	if (!list_empty(fd_list))
+	if (!list_empty(hpriv_list))
 		ssleep(1);
 
 	if (timeout) {
@@ -1331,12 +1331,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 		}
 	}
 
-	mutex_lock(fd_lock);
+	mutex_lock(hpriv_lock);
 
 	/* This section must be protected because we are dereferencing
 	 * pointers that are freed if the process exits
 	 */
-	list_for_each_entry(hpriv, fd_list, dev_node) {
+	list_for_each_entry(hpriv, hpriv_list, dev_node) {
 		task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
 		if (task) {
 			dev_info(hdev->dev, "Killing user process pid=%d\n",
@@ -1346,18 +1346,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 
 			put_task_struct(task);
 		} else {
-			/*
-			 * If we got here, it means that process was killed from outside the driver
-			 * right after it started looping on fd_list and before get_pid_task, thus
-			 * we don't need to kill it.
-			 */
 			dev_dbg(hdev->dev,
-				"Can't get task struct for user process %d, assuming process was killed from outside the driver\n",
+				"Can't get task struct for user process %d, process was killed from outside the driver\n",
 				pid_nr(hpriv->taskpid));
 		}
 	}
 
-	mutex_unlock(fd_lock);
+	mutex_unlock(hpriv_lock);
 
 	/*
 	 * We killed the open users, but that doesn't mean they are closed.
@@ -1369,7 +1364,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 	 */
 
 wait_for_processes:
-	while ((!list_empty(fd_list)) && (pending_cnt)) {
+	while ((!list_empty(hpriv_list)) && (pending_cnt)) {
 		dev_dbg(hdev->dev,
 			"Waiting for all unmap operations to finish before hard reset\n");
 
@@ -1379,7 +1374,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 	}
 
 	/* All processes exited successfully */
-	if (list_empty(fd_list))
+	if (list_empty(hpriv_list))
 		return 0;
 
 	/* Give up waiting for processes to exit */
@@ -1393,17 +1388,17 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 
 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
 {
-	struct list_head *fd_list;
+	struct list_head *hpriv_list;
 	struct hl_fpriv *hpriv;
-	struct mutex *fd_lock;
+	struct mutex *hpriv_lock;
 
-	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
-	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+	hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+	hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
 
-	mutex_lock(fd_lock);
-	list_for_each_entry(hpriv, fd_list, dev_node)
+	mutex_lock(hpriv_lock);
+	list_for_each_entry(hpriv, hpriv_list, dev_node)
 		hpriv->hdev = NULL;
-	mutex_unlock(fd_lock);
+	mutex_unlock(hpriv_lock);
 }
 
 static void send_disable_pci_access(struct hl_device *hdev, u32 flags)

From eaa43a06b765542ee36f38bdd92b04bdc8645964 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Mon, 12 Jun 2023 13:47:49 +0300
Subject: [PATCH 13/77] accel/habanalabs: Allow single timestamp registration
 request at a time

Protect against concurrency of user requesting to register a timestamp
offset (where the driver fills the timestamp when the command submission
has finished executing) to a specific user interrupt ID. The
protection is basically to allow only one timestamp registration
request to be handled at a time.

This is needed because the user can decide to re-use a timestamp
offset (register an already registered offset, to a different
interrupt ID). This means the request will cause the timestamp node to
move from one interrupt list to another interrupt list. In such
scenario, without proper protection, we could end up adding the same
node twice to the interrupts wait lists.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 336 +++++++++++-------
 drivers/accel/habanalabs/common/context.c     |   3 +
 drivers/accel/habanalabs/common/habanalabs.h  |  12 +-
 drivers/accel/habanalabs/common/irq.c         |  17 +-
 4 files changed, 228 insertions(+), 140 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index c23829dab97a..02ac6d754fba 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -31,6 +31,25 @@ enum hl_cs_wait_status {
 	CS_WAIT_STATUS_GONE
 };
 
+/*
+ * Data used while handling wait/timestamp nodes.
+ * The purpose of this struct is to store the needed data for both operations
+ * in one variable instead of passing large number of arguments to functions.
+ */
+struct wait_interrupt_data {
+	struct hl_user_interrupt *interrupt;
+	struct hl_mmap_mem_buf *buf;
+	struct hl_mem_mgr *mmg;
+	struct hl_cb *cq_cb;
+	u64 ts_handle;
+	u64 ts_offset;
+	u64 cq_handle;
+	u64 cq_offset;
+	u64 target_value;
+	u64 intr_timeout_us;
+	unsigned long flags;
+};
+
 static void job_wq_completion(struct work_struct *work);
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
 				enum hl_cs_wait_status *status, s64 *timestamp);
@@ -3197,133 +3216,181 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
-static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
-					struct hl_cb *cq_cb,
-					u64 ts_offset, u64 cq_offset, u64 target_value,
-					spinlock_t *wait_list_lock,
-					struct hl_user_pending_interrupt **pend)
+static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
+					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
 {
-	struct hl_ts_buff *ts_buff = buf->private;
-	struct hl_user_pending_interrupt *requested_offset_record =
-				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
-				ts_offset;
-	struct hl_user_pending_interrupt *cb_last =
-			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+	record->ts_reg_info.cq_cb = cq_cb;
+	record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
+	record->cq_target_value = target_value;
+}
+
+static int validate_and_get_ts_record(struct device *dev,
+					struct hl_ts_buff *ts_buff, u64 ts_offset,
+					struct hl_user_pending_interrupt **req_event_record)
+{
+	struct hl_user_pending_interrupt *ts_cb_last;
+
+	*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+						ts_offset;
+	ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
 			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
-	unsigned long iter_counter = 0;
-	u64 current_cq_counter;
-	ktime_t timestamp;
 
 	/* Validate ts_offset not exceeding last max */
-	if (requested_offset_record >= cb_last) {
-		dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
-								(u64)(uintptr_t)cb_last);
+	if (*req_event_record >= ts_cb_last) {
+		dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
+				ts_offset, (u64)(uintptr_t)ts_cb_last);
 		return -EINVAL;
 	}
 
-	timestamp = ktime_get();
-
-start_over:
-	spin_lock(wait_list_lock);
-
-	/* Unregister only if we didn't reach the target value
-	 * since in this case there will be no handling in irq context
-	 * and then it's safe to delete the node out of the interrupt list
-	 * then re-use it on other interrupt
-	 */
-	if (requested_offset_record->ts_reg_info.in_use) {
-		current_cq_counter = *requested_offset_record->cq_kernel_addr;
-		if (current_cq_counter < requested_offset_record->cq_target_value) {
-			list_del(&requested_offset_record->wait_list_node);
-			spin_unlock(wait_list_lock);
-
-			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
-			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
-
-			dev_dbg(buf->mmg->dev,
-				"ts node removed from interrupt list now can re-use\n");
-		} else {
-			dev_dbg(buf->mmg->dev,
-				"ts node in middle of irq handling\n");
-
-			/* irq thread handling in the middle give it time to finish */
-			spin_unlock(wait_list_lock);
-			usleep_range(100, 1000);
-			if (++iter_counter == MAX_TS_ITER_NUM) {
-				dev_err(buf->mmg->dev,
-					"Timestamp offset processing reached timeout of %lld ms\n",
-					ktime_ms_delta(ktime_get(), timestamp));
-				return -EAGAIN;
-			}
-
-			goto start_over;
-		}
-	} else {
-		/* Fill up the new registration node info */
-		requested_offset_record->ts_reg_info.buf = buf;
-		requested_offset_record->ts_reg_info.cq_cb = cq_cb;
-		requested_offset_record->ts_reg_info.timestamp_kernel_addr =
-				(u64 *) ts_buff->user_buff_address + ts_offset;
-		requested_offset_record->cq_kernel_addr =
-				(u64 *) cq_cb->kernel_address + cq_offset;
-		requested_offset_record->cq_target_value = target_value;
-
-		spin_unlock(wait_list_lock);
-	}
-
-	*pend = requested_offset_record;
-
-	dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n",
-		requested_offset_record);
 	return 0;
 }
 
+static int unregister_timestamp_node(struct hl_device *hdev, struct hl_ctx *ctx,
+			struct hl_mem_mgr *mmg, u64 ts_handle, u64 ts_offset,
+			struct hl_user_interrupt *interrupt)
+{
+	struct hl_user_pending_interrupt *req_event_record, *pend, *temp_pend;
+	struct hl_mmap_mem_buf *buff;
+	struct hl_ts_buff *ts_buff;
+	bool ts_rec_found = false;
+	int rc;
+
+	buff = hl_mmap_mem_buf_get(mmg, ts_handle);
+	if (!buff) {
+		dev_err(hdev->dev, "invalid TS buff handle!\n");
+		return -EINVAL;
+	}
+
+	ts_buff = buff->private;
+
+	rc = validate_and_get_ts_record(hdev->dev, ts_buff, ts_offset, &req_event_record);
+	if (rc)
+		goto put_buf;
+
+	/*
+	 * Note: we don't use the ts in_use field here, but we rather scan the list
+	 * because we cannot rely on the user to keep the order of register/unregister calls
+	 * and since we might have races here all the time between the irq and register/unregister
+	 * calls so it safer to lock the list and scan it to find the node.
+	 * If the node found on the list we mark it as not in use and delete it from the list,
+	 * if it's not here then the node was handled already in the irq before we get into
+	 * this ioctl.
+	 */
+	spin_lock(&interrupt->wait_list_lock);
+
+	list_for_each_entry_safe(pend, temp_pend, &interrupt->wait_list_head, wait_list_node) {
+		if (pend == req_event_record) {
+			pend->ts_reg_info.in_use = false;
+			list_del(&pend->wait_list_node);
+			ts_rec_found = true;
+			break;
+		}
+	}
+
+	spin_unlock(&interrupt->wait_list_lock);
+
+	/* Put refcounts that were taken when we registered the event */
+	if (ts_rec_found) {
+		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
+		hl_cb_put(pend->ts_reg_info.cq_cb);
+	}
+
+put_buf:
+	hl_mmap_mem_buf_put(buff);
+
+	return rc;
+}
+
+static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
+					struct wait_interrupt_data *data,
+					struct hl_user_pending_interrupt **pend)
+{
+	struct hl_user_pending_interrupt *req_offset_record;
+	struct hl_ts_buff *ts_buff = data->buf->private;
+	int rc;
+
+	rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
+									&req_offset_record);
+	if (rc)
+		return rc;
+
+	/* In case the node already registered, need to unregister first then re-use*/
+	if (req_offset_record->ts_reg_info.in_use) {
+		dev_dbg(data->buf->mmg->dev,
+				"Requested ts offset(%llx) is in use, unregister first\n",
+							data->ts_offset);
+		/*
+		 * Since interrupt here can be different than the one the node currently registered
+		 * on, and we don't wan't to lock two lists while we're doing unregister, so
+		 * unlock the new interrupt wait list here and acquire the lock again after you done
+		 */
+		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
+
+		unregister_timestamp_node(hdev, ctx, data->mmg, data->ts_handle,
+				data->ts_offset, req_offset_record->ts_reg_info.interrupt);
+		spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
+	}
+
+	/* Fill up the new registration node info and add it to the list */
+	req_offset_record->ts_reg_info.in_use = true;
+	req_offset_record->ts_reg_info.buf = data->buf;
+	req_offset_record->ts_reg_info.timestamp_kernel_addr =
+			(u64 *) ts_buff->user_buff_address + data->ts_offset;
+	req_offset_record->ts_reg_info.interrupt = data->interrupt;
+	set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
+						data->target_value);
+
+	*pend = req_offset_record;
+
+	return rc;
+}
+
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
-				struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
-				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
-				u64 target_value, struct hl_user_interrupt *interrupt,
-				bool register_ts_record, u64 ts_handle, u64 ts_offset,
+				struct wait_interrupt_data *data,
+				bool register_ts_record,
 				u32 *status, u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	struct hl_mmap_mem_buf *buf;
-	struct hl_cb *cq_cb;
 	unsigned long timeout;
 	long completion_rc;
 	int rc = 0;
 
-	timeout = hl_usecs64_to_jiffies(timeout_us);
+	timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
 
 	hl_ctx_get(ctx);
 
-	cq_cb = hl_cb_get(cb_mmg, cq_counters_handle);
-	if (!cq_cb) {
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
 		rc = -EINVAL;
 		goto put_ctx;
 	}
 
 	/* Validate the cq offset */
-	if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >=
-			((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) {
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
 		rc = -EINVAL;
 		goto put_cq_cb;
 	}
 
 	if (register_ts_record) {
-		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
-					interrupt->interrupt_id, ts_offset, cq_counters_offset);
-		buf = hl_mmap_mem_buf_get(mmg, ts_handle);
-		if (!buf) {
+		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
+				data->interrupt->interrupt_id, data->ts_handle,
+				data->ts_offset, data->cq_offset);
+
+		data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
+		if (!data->buf) {
 			rc = -EINVAL;
 			goto put_cq_cb;
 		}
 
+		spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
+
 		/* get ts buffer record */
-		rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
-						cq_counters_offset, target_value,
-						&interrupt->wait_list_lock, &pend);
-		if (rc)
+		rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &pend);
+		if (rc) {
+			spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
 			goto put_ts_buff;
+		}
 	} else {
 		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
 		if (!pend) {
@@ -3331,19 +3398,22 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 			goto put_cq_cb;
 		}
 		hl_fence_init(&pend->fence, ULONG_MAX);
-		pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
-		pend->cq_target_value = target_value;
+		pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
+		pend->cq_target_value = data->target_value;
+		spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
 	}
 
-	spin_lock(&interrupt->wait_list_lock);
-
 	/* We check for completion value as interrupt could have been received
-	 * before we added the node to the wait list
+	 * before we add the wait/timestamp node to the wait list.
 	 */
-	if (*pend->cq_kernel_addr >= target_value) {
-		if (register_ts_record)
-			pend->ts_reg_info.in_use = 0;
-		spin_unlock(&interrupt->wait_list_lock);
+	if (*pend->cq_kernel_addr >= data->target_value) {
+		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
+
+		if (register_ts_record) {
+			dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
+					pend, data->ts_offset, data->interrupt->interrupt_id);
+			pend->ts_reg_info.in_use = false;
+		}
 
 		*status = HL_WAIT_CS_STATUS_COMPLETED;
 
@@ -3354,8 +3424,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 			pend->fence.timestamp = ktime_get();
 			goto set_timestamp;
 		}
-	} else if (!timeout_us) {
-		spin_unlock(&interrupt->wait_list_lock);
+	} else if (!data->intr_timeout_us) {
+		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
 		*status = HL_WAIT_CS_STATUS_BUSY;
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
@@ -3366,21 +3436,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * Note that we cannot have sorted list by target value,
 	 * in order to shorten the list pass loop, since
 	 * same list could have nodes for different cq counter handle.
-	 * Note:
-	 * Mark ts buff offset as in use here in the spinlock protection area
-	 * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record
-	 * before adding the node to the list. this scenario might happen when
-	 * multiple threads are racing on same offset and one thread could
-	 * set the ts buff in ts_buff_get_kernel_ts_record then the other thread
-	 * takes over and get to ts_buff_get_kernel_ts_record and then we will try
-	 * to re-use the same ts buff offset, and will try to delete a non existing
-	 * node from the list.
 	 */
-	if (register_ts_record)
-		pend->ts_reg_info.in_use = 1;
-
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock(&interrupt->wait_list_lock);
+	list_add_tail(&pend->wait_list_node, &data->interrupt->wait_list_head);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
 
 	if (register_ts_record) {
 		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
@@ -3396,7 +3454,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		if (completion_rc == -ERESTARTSYS) {
 			dev_err_ratelimited(hdev->dev,
 					"user process got signal while waiting for interrupt ID %d\n",
-					interrupt->interrupt_id);
+					data->interrupt->interrupt_id);
 			rc = -EINTR;
 			*status = HL_WAIT_CS_STATUS_ABORTED;
 		} else {
@@ -3424,23 +3482,23 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * for ts record, the node will be deleted in the irq handler after
 	 * we reach the target value.
 	 */
-	spin_lock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
 	list_del(&pend->wait_list_node);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
 
 set_timestamp:
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
 	kfree(pend);
-	hl_cb_put(cq_cb);
+	hl_cb_put(data->cq_cb);
 ts_registration_exit:
 	hl_ctx_put(ctx);
 
 	return rc;
 
 put_ts_buff:
-	hl_mmap_mem_buf_put(buf);
+	hl_mmap_mem_buf_put(data->buf);
 put_cq_cb:
-	hl_cb_put(cq_cb);
+	hl_cb_put(data->cq_cb);
 put_ctx:
 	hl_ctx_put(ctx);
 
@@ -3611,19 +3669,41 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return -EINVAL;
 	}
 
-	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
-		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
-				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
-				args->in.cq_counters_offset,
-				args->in.target, interrupt,
+	/*
+	 * Allow only one registration at a time. this is needed in order to prevent issues
+	 * while handling the flow of re-use of the same offset.
+	 * Since the registration flow is protected only by the interrupt lock, re-use flow
+	 * might request to move ts node to another interrupt list, and in such case we're
+	 * not protected.
+	 */
+	if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT)
+		mutex_lock(&hpriv->ctx->ts_reg_lock);
+
+	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
+		struct wait_interrupt_data wait_intr_data = {0};
+
+		wait_intr_data.interrupt = interrupt;
+		wait_intr_data.mmg = &hpriv->mem_mgr;
+		wait_intr_data.cq_handle = args->in.cq_counters_handle;
+		wait_intr_data.cq_offset = args->in.cq_counters_offset;
+		wait_intr_data.ts_handle = args->in.timestamp_handle;
+		wait_intr_data.ts_offset = args->in.timestamp_offset;
+		wait_intr_data.target_value = args->in.target;
+		wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
+
+		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
 				!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
-				args->in.timestamp_handle, args->in.timestamp_offset,
 				&status, &timestamp);
-	else
+	} else {
 		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
 				args->in.interrupt_timeout_us, args->in.addr,
 				args->in.target, interrupt, &status,
 				&timestamp);
+	}
+
+	if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT)
+		mutex_unlock(&hpriv->ctx->ts_reg_lock);
+
 	if (rc)
 		return rc;
 
diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c
index 0a53f7154739..b83141f58319 100644
--- a/drivers/accel/habanalabs/common/context.c
+++ b/drivers/accel/habanalabs/common/context.c
@@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 		hl_vm_ctx_fini(ctx);
 		hl_asid_free(hdev, ctx->asid);
 		hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
+		mutex_destroy(&ctx->ts_reg_lock);
 	} else {
 		dev_dbg(hdev->dev, "closing kernel context\n");
 		hdev->asic_funcs->ctx_fini(ctx);
@@ -268,6 +269,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 
 		hl_encaps_sig_mgr_init(&ctx->sig_mgr);
 
+		mutex_init(&ctx->ts_reg_lock);
+
 		dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
 			get_task_comm(task_comm, current), ctx->asid);
 	}
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 2f027d5a8206..b6b099e8133c 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1144,6 +1144,7 @@ struct timestamp_reg_work_obj {
  * @buf: pointer to the timestamp buffer which include both user/kernel buffers.
  *       relevant only when doing timestamps records registration.
  * @cq_cb: pointer to CQ counter CB.
+ * @interrupt: interrupt that the node hanged on it's wait list.
  * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
  *                         relevant only when doing timestamps records
  *                         registration.
@@ -1153,10 +1154,11 @@ struct timestamp_reg_work_obj {
  *          allocating records dynamically.
  */
 struct timestamp_reg_info {
-	struct hl_mmap_mem_buf	*buf;
-	struct hl_cb		*cq_cb;
-	u64			*timestamp_kernel_addr;
-	u8			in_use;
+	struct hl_mmap_mem_buf		*buf;
+	struct hl_cb			*cq_cb;
+	struct hl_user_interrupt	*interrupt;
+	u64				*timestamp_kernel_addr;
+	bool				in_use;
 };
 
 /**
@@ -1835,6 +1837,7 @@ struct hl_cs_outcome_store {
  * @va_range: holds available virtual addresses for host and dram mappings.
  * @mem_hash_lock: protects the mem_hash.
  * @hw_block_list_lock: protects the HW block memory list.
+ * @ts_reg_lock: timestamp registration ioctls lock.
  * @debugfs_list: node in debugfs list of contexts.
  * @hw_block_mem_list: list of HW block virtual mapped addresses.
  * @cs_counters: context command submission counters.
@@ -1871,6 +1874,7 @@ struct hl_ctx {
 	struct hl_va_range		*va_range[HL_VA_RANGE_TYPE_MAX];
 	struct mutex			mem_hash_lock;
 	struct mutex			hw_block_list_lock;
+	struct mutex			ts_reg_lock;
 	struct list_head		debugfs_list;
 	struct list_head		hw_block_mem_list;
 	struct hl_cs_counters_atomic	cs_counters;
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index b1010d206c2e..10ac100bf9e2 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -233,7 +233,8 @@ static void hl_ts_free_objects(struct work_struct *work)
  * list to a dedicated workqueue to do the actual put.
  */
 static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
-						struct list_head **free_list, ktime_t now)
+						struct list_head **free_list, ktime_t now,
+						u32 interrupt_id)
 {
 	struct timestamp_reg_free_node *free_node;
 	u64 timestamp;
@@ -255,14 +256,12 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 
 	*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
 
-	dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
-			pend->ts_reg_info.timestamp_kernel_addr,
-			*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
-
-	list_del(&pend->wait_list_node);
+	dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
+			pend, pend->ts_reg_info.timestamp_kernel_addr, interrupt_id);
 
 	/* Mark kernel CB node as free */
-	pend->ts_reg_info.in_use = 0;
+	pend->ts_reg_info.in_use = false;
+	list_del(&pend->wait_list_node);
 
 	/* Putting the refcount for ts_buff and cq_cb objects will be handled
 	 * in workqueue context, just add job to free_list.
@@ -296,13 +295,15 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 		return;
 
 	spin_lock(&intr->wait_list_lock);
+
 	list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
 		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
 				!pend->cq_kernel_addr) {
 			if (pend->ts_reg_info.buf) {
 				if (!reg_node_handle_fail) {
 					rc = handle_registration_node(hdev, pend,
-							&ts_reg_free_list_head, intr->timestamp);
+							&ts_reg_free_list_head, intr->timestamp,
+							intr->interrupt_id);
 					if (rc)
 						reg_node_handle_fail = true;
 				}

From 2da9f8d8059331db9d26a5e794a417be547c4236 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Mon, 12 Jun 2023 13:58:11 +0300
Subject: [PATCH 14/77] accel/habanalabs: fix wait_for_interrupt abortion flow

When the driver needs to abort waiters for interrupts, for cases
such as critical events that occur and driver need to do hard reset,
in such scenario the driver will complete the fence to wake up the
waiting thread, and will set the fence error indication.
The return value of the completion API will be greater than 0
since it will return the timeout, but as this indicates successful
completion, the driver should mark it as aborted.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 02ac6d754fba..396bbf8652b7 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -3449,7 +3449,15 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
 								timeout);
 	if (completion_rc > 0) {
-		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		if (pend->fence.error == -EIO) {
+			dev_err_ratelimited(hdev->dev,
+					"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+					pend->fence.error);
+			rc = -EIO;
+			*status = HL_WAIT_CS_STATUS_ABORTED;
+		} else {
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		}
 	} else {
 		if (completion_rc == -ERESTARTSYS) {
 			dev_err_ratelimited(hdev->dev,
@@ -3458,21 +3466,13 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 			rc = -EINTR;
 			*status = HL_WAIT_CS_STATUS_ABORTED;
 		} else {
-			if (pend->fence.error == -EIO) {
-				dev_err_ratelimited(hdev->dev,
-						"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
-						pend->fence.error);
-				rc = -EIO;
-				*status = HL_WAIT_CS_STATUS_ABORTED;
-			} else {
-				/* The wait has timed-out. We don't know anything beyond that
-				 * because the workload wasn't submitted through the driver.
-				 * Therefore, from driver's perspective, the workload is still
-				 * executing.
-				 */
-				rc = 0;
-				*status = HL_WAIT_CS_STATUS_BUSY;
-			}
+			/* The wait has timed-out. We don't know anything beyond that
+			 * because the workload was not submitted through the driver.
+			 * Therefore, from driver's perspective, the workload is still
+			 * executing.
+			 */
+			rc = 0;
+			*status = HL_WAIT_CS_STATUS_BUSY;
 		}
 	}
 

From 89803af5355b04f4e2583d5c7170aa3676f9c1b7 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 12 Jun 2023 14:22:07 +0300
Subject: [PATCH 15/77] accel/habanalabs: remove pdev check on idle check

Our simulator supports idle check so no need anymore to check if pdev
exists.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 0d02f1f7b994..5e61761b8c11 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -424,7 +424,7 @@ static void hpriv_release(struct kref *ref)
 	/* Check the device idle status and reset if not idle.
 	 * Skip it if already in reset, or if device is going to be reset in any case.
 	 */
-	if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm)
+	if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm)
 		device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 							HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
 	if (!device_is_idle) {

From 37d72439a4b17dda2adc2de98bcb98932fd6ceb2 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 12 Jun 2023 14:24:05 +0300
Subject: [PATCH 16/77] accel/habanalabs: reset device if scrubbing failed

If scrubbing memory after user released device has failed it means
the device is in a bad state and should be reset.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/device.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 5e61761b8c11..d7d9198b2103 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -454,8 +454,10 @@ static void hpriv_release(struct kref *ref)
 		/* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
 		int rc = hdev->asic_funcs->scrub_device_mem(hdev);
 
-		if (rc)
+		if (rc) {
 			dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
+			hl_device_reset(hdev, HL_DRV_RESET_HARD);
+		}
 	}
 
 	/* Now we can mark the compute_ctx as not active. Even if a reset is running in a different

From d33c3d0541339fb16e241283f9c1ef369b4a7093 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Mon, 12 Jun 2023 14:51:15 +0300
Subject: [PATCH 17/77] accel/habanalabs: dump temperature threshold boot error

Add dump of an error reported from f/w during boot time.
This error indicates a failure with setting temperature threshold.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/firmware_if.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 370508e98854..c7da69dbfa0a 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		err_exists = true;
 	}
 
+	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
+		dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
+		err_exists = true;
+	}
+
 	if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
 		/* Ignore this bit, don't prevent driver loading */
 		dev_dbg(hdev->dev, "device unusable status is set\n");

From e11a7d2ca5cd36b3d1db4d0ccce30c54a29e1ed9 Mon Sep 17 00:00:00 2001
From: Ivan Orlov <ivan.orlov0322@gmail.com>
Date: Tue, 20 Jun 2023 20:25:29 +0200
Subject: [PATCH 18/77] accel: make accel_class a static const structure

Now that the driver core allows for struct class to be in read-only
memory, move the accel_class structure to be declared at build time
placing it into read-only memory, instead of having to be dynamically
allocated at boot time.

Cc: dri-devel@lists.freedesktop.org
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Ivan Orlov <ivan.orlov0322@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/drm_accel.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index 94b4ac12cf24..294b572a9c33 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock);
 static struct idr accel_minors_idr;
 
 static struct dentry *accel_debugfs_root;
-static struct class *accel_class;
 
 static struct device_type accel_sysfs_device_minor = {
 	.name = "accel_minor"
@@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode)
 	return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
 }
 
+static const struct class accel_class = {
+	.name = "accel",
+	.devnode = accel_devnode,
+};
+
 static int accel_sysfs_init(void)
 {
-	accel_class = class_create("accel");
-	if (IS_ERR(accel_class))
-		return PTR_ERR(accel_class);
-
-	accel_class->devnode = accel_devnode;
-
-	return 0;
+	return class_register(&accel_class);
 }
 
 static void accel_sysfs_destroy(void)
 {
-	if (IS_ERR_OR_NULL(accel_class))
-		return;
-	class_destroy(accel_class);
-	accel_class = NULL;
+	class_unregister(&accel_class);
 }
 
 static int accel_name_info(struct seq_file *m, void *data)
@@ -117,7 +112,7 @@ void accel_debugfs_register(struct drm_device *dev)
 void accel_set_device_instance_params(struct device *kdev, int index)
 {
 	kdev->devt = MKDEV(ACCEL_MAJOR, index);
-	kdev->class = accel_class;
+	kdev->class = &accel_class;
 	kdev->type = &accel_sysfs_device_minor;
 }
 

From b03dc2b621fad55b09126d0e7919b85fdf84a153 Mon Sep 17 00:00:00 2001
From: Juerg Haefliger <juerg.haefliger@canonical.com>
Date: Fri, 16 Jun 2023 14:16:37 +0200
Subject: [PATCH 19/77] accel/habanalabs/gaudi: Add MODULE_FIRMWARE macros

The module loads firmware so add MODULE_FIRMWARE macros to provide that
information via modinfo.

Signed-off-by: Juerg Haefliger <juerg.haefliger@canonical.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi/gaudi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index 056e2ef44afb..e0f6541eb3d6 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -63,6 +63,10 @@
 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
 
+MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
+MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
+MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
+
 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
 
 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */

From 5a8487ac54019cd597334292f36be4248a97e816 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 15 Jun 2023 15:30:39 +0300
Subject: [PATCH 20/77] accel/habanalabs/gaudi2: un-secure register for engine
 cores interrupt

The F/W dynamically allocates one of the PSOC scratchpad registers for
the engine cores, so they can raise events towards the F/W.
To allow the engine cores to access this register, this register must be
non-secured.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../accel/habanalabs/gaudi2/gaudi2_security.c | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
index 2742b1f801eb..d08267e59303 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
@@ -2907,7 +2907,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
 	 * - range 11: NIC11_CFG + *_DBG (not including TPC_DBG)
 	 *
 	 * If F/W security is not enabled:
-	 * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP)
+	 * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF)
 	 */
 	u64 lbw_range_min_short[] = {
 		mmNIC0_TX_AXUSER_BASE,
@@ -2923,7 +2923,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
 		mmNIC10_TX_AXUSER_BASE,
 		mmNIC11_TX_AXUSER_BASE,
 		mmPSOC_I2C_M0_BASE,
-		mmPSOC_EFUSE_BASE
+		mmPSOC_GPIO0_BASE
 	};
 	u64 lbw_range_max_short[] = {
 		mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE,
@@ -3219,6 +3219,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev)
  */
 static int gaudi2_init_protection_bits(struct hl_device *hdev)
 {
+	u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u32 instance_offset;
 	int rc = 0;
@@ -3389,11 +3390,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
 	/* PSOC.
 	 * Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are
 	 * protected by privileged RR.
+	 * For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine
+	 * cores to raise events towards F/W.
 	 */
+	engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE);
+	if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 &&
+			engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) {
+		user_regs_array = &engine_core_intr_reg;
+		user_regs_array_size = 1;
+	} else {
+		dev_err(hdev->dev,
+			"Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n",
+			engine_core_intr_reg);
+	}
+
 	rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
 			HL_PB_SINGLE_INSTANCE, HL_PB_NA,
 			gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf),
-			NULL, HL_PB_NA);
+			user_regs_array, user_regs_array_size);
 
 	if (!hdev->asic_prop.fw_security_enabled)
 		rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,

From 1e3a78270b4ec1c8c177eb310c08128d52137a69 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Wed, 28 Jun 2023 14:40:46 +0300
Subject: [PATCH 21/77] accel/habanalabs/gaudi2: unsecure tpc count registers

As TPC kernels now must use those registers we unsecure them.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2_security.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
index d08267e59303..34bf80c5a44b 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
@@ -1601,6 +1601,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = {
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_30,
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_31,
 	mmDCORE0_TPC0_CFG_TPC_SB_L0CD,
+	mmDCORE0_TPC0_CFG_TPC_COUNT,
 	mmDCORE0_TPC0_CFG_TPC_ID,
 	mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC,
 	mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0,

From 15c0bb162353296cd2f897d2fa39d2593fb631b3 Mon Sep 17 00:00:00 2001
From: Igor Grinberg <igrinberg@habana.ai>
Date: Tue, 4 Jul 2023 15:38:40 +0300
Subject: [PATCH 22/77] accel/habanalabs/gaudi2: prepare to remove soft_rst_irq

The soft reset has transitioned to CPUCP packet instead of plain
register write and is about to be removed from the struct cpu_dyn_regs.
As a preparation for removing the gic_host_soft_rst_irq field from
struct cpu_dyn_regs, switch to use the plain macro - this keeps the
backward compatibility.

Signed-off-by: Igor Grinberg <igrinberg@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 1085215ac51d..2b537ef6a550 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -6264,7 +6264,8 @@ static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_perform
 				WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
 			else
 				WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
-			WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
+
+			WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
 				gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
 
 			/* wait for f/w response */

From fa46c7bb501b8b649ba17e4a048243b85ba1b1b0 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Tue, 11 Jul 2023 10:28:18 +0300
Subject: [PATCH 23/77] accel/habanalabs/gaudi2: fix missing check of kernel
 ctx

If we are initializing the kernel context when we have a Gaudi2 device,
we don't need to do any late initializing of that context with
specific Gaudi2 code.

Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 2b537ef6a550..22a9aee9a7c9 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -10651,6 +10651,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx)
 {
 	int rc;
 
+	if (ctx->asid == HL_KERNEL_ASID_ID)
+		return 0;
+
 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
 	if (rc)
 		return rc;

From 88872790923e2d80edf29a00b4e440f1473fa8f5 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Tue, 20 Jun 2023 12:09:17 +0300
Subject: [PATCH 24/77] accel/habanalabs: handle f/w reserved dram space
 request

It is possible for FW to request reserved space in dram.
If the device supports this option, it will retrieve the size from the
f/w and will reserve it.

Currently we add the common code infrastructure to support it.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/firmware_if.c        | 5 +++++
 drivers/accel/habanalabs/common/habanalabs.h         | 4 ++++
 drivers/accel/habanalabs/include/common/hl_boot_if.h | 5 +++++
 3 files changed, 14 insertions(+)

diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index c7da69dbfa0a..2bc775d29854 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -2783,6 +2783,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 				hdev->decoder_binning, hdev->rotator_binning);
 		}
 
+		if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
+			hdev->asic_prop.reserved_fw_mem_size =
+				le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
+		}
+
 		return 0;
 	}
 
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index b6b099e8133c..e69b9b195f48 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -641,6 +641,7 @@ struct hl_hints_range {
  * @glbl_err_cause_num: global err cause number.
  * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
  *                 not supported.
+ * @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
  * @collective_first_sob: first sync object available for collective use
  * @collective_first_mon: first monitor available for collective use
  * @sync_stream_first_sob: first sync object available for sync stream use
@@ -689,6 +690,7 @@ struct hl_hints_range {
  * @dma_mask: the dma mask to be set for this device
  * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
  * @supports_engine_modes: true if changing engines/engine_cores modes is supported.
+ * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -772,6 +774,7 @@ struct asic_fixed_properties {
 	u32				num_of_special_blocks;
 	u32				glbl_err_cause_num;
 	u32				hbw_flush_reg;
+	u32				reserved_fw_mem_size;
 	u16				collective_first_sob;
 	u16				collective_first_mon;
 	u16				sync_stream_first_sob;
@@ -808,6 +811,7 @@ struct asic_fixed_properties {
 	u8				dma_mask;
 	u8				supports_advanced_cpucp_rc;
 	u8				supports_engine_modes;
+	u8				support_dynamic_resereved_fw_size;
 };
 
 /**
diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h
index cff79f7f9f75..7de8a5786a36 100644
--- a/drivers/accel/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/accel/habanalabs/include/common/hl_boot_if.h
@@ -570,6 +570,8 @@ struct lkd_fw_comms_desc {
 	__le64 img_addr;	/* address for next FW component load */
 	struct lkd_fw_binning_info binning_info;
 	struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX];
+	__le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */
+	char reserved1[4];
 };
 
 enum comms_reset_cause {
@@ -596,6 +598,9 @@ struct lkd_fw_comms_msg {
 			__le64 img_addr;
 			struct lkd_fw_binning_info binning_info;
 			struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX];
+			/* reserved memory size [MB] for FW/SVE */
+			__le32 rsvd_mem_size_mb;
+			char reserved1[4];
 		};
 		struct {
 			__u8 reset_cause;

From 10926f60051332a754084d45862afc4e83e597e1 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 13 Jun 2023 17:29:14 +0300
Subject: [PATCH 25/77] accel/habanalabs: set default device release watchdog
 T/O as 30 sec

After being notified about certain errors, user is expected to finish
his post-errors actions and to release the device within some timeout,
after which is deice is being reset.
The default timeout value is 5 sec, which in some case is not enough for
a user application to collect debug data.
Increase the default value to 30 sec.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index d7d9198b2103..28be0fc325ea 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -18,7 +18,7 @@
 
 #define HL_RESET_DELAY_USEC			10000	/* 10ms */
 
-#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC	5
+#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC	30
 
 enum dma_alloc_type {
 	DMA_ALLOC_COHERENT,

From a8ab1a81ccc2c68a4fa3d0631ce17529e208c8c2 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 23 May 2023 10:42:19 +0300
Subject: [PATCH 26/77] accel/habanalabs: add info ioctl for engine error
 reports

User gets notification for every engine error report, but he still
lacks the exact engine information. Hence, we allow user to query
for the exact engine reported an error.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c      |  14 ++
 drivers/accel/habanalabs/common/habanalabs.h  |  17 ++
 .../habanalabs/common/habanalabs_ioctl.c      |  25 +++
 drivers/accel/habanalabs/gaudi2/gaudi2.c      | 168 ++++++++++++++++++
 include/uapi/drm/habanalabs_accel.h           |  16 ++
 5 files changed, 240 insertions(+)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 28be0fc325ea..80cce6b74d05 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -2701,6 +2701,20 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info)
 		*info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
 }
 
+void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count)
+{
+	struct engine_err_info *info = &hdev->captured_err_info.engine_err;
+
+	/* Capture only the first engine error */
+	if (atomic_cmpxchg(&info->event_detected, 0, 1))
+		return;
+
+	info->event.timestamp = ktime_to_ns(ktime_get());
+	info->event.engine_id = engine_id;
+	info->event.error_count = error_count;
+	info->event_info_available = true;
+}
+
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
 {
 	vfree(captured_err_info->page_fault_info.user_mappings);
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index e69b9b195f48..2bd3dedfa4b5 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3062,6 +3062,20 @@ struct fw_err_info {
 	bool				event_info_available;
 };
 
+/**
+ * struct engine_err_info - engine error information.
+ * @event: holds information on the event.
+ * @event_detected: if set as 1, then an engine event was discovered for the
+ *                  first time after the driver has finished booting-up.
+ * @event_info_available: indicates that an engine event info is now available.
+ */
+struct engine_err_info {
+	struct hl_info_engine_err_event	event;
+	atomic_t			event_detected;
+	bool				event_info_available;
+};
+
+
 /**
  * struct hl_error_info - holds information collected during an error.
  * @cs_timeout: CS timeout error information.
@@ -3070,6 +3084,7 @@ struct fw_err_info {
  * @page_fault_info: page fault information.
  * @hw_err: (fatal) hardware error information.
  * @fw_err: firmware error information.
+ * @engine_err: engine error information.
  */
 struct hl_error_info {
 	struct cs_timeout_info		cs_timeout;
@@ -3078,6 +3093,7 @@ struct hl_error_info {
 	struct page_fault_info		page_fault_info;
 	struct hw_err_info		hw_err;
 	struct fw_err_info		fw_err;
+	struct engine_err_info		engine_err;
 };
 
 /**
@@ -3952,6 +3968,7 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_
 				u64 *event_mask);
 void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask);
 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
+void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 549b2518fae0..097d65e493c8 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -875,6 +875,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	return rc ? -EFAULT : 0;
 }
 
+static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
+	struct hl_device *hdev = hpriv->hdev;
+	u32 user_buf_size = args->return_size;
+	struct engine_err_info *info;
+	int rc;
+
+	if (!user_buf)
+		return -EINVAL;
+
+	info = &hdev->captured_err_info.engine_err;
+	if (!info->event_info_available)
+		return 0;
+
+	if (user_buf_size < sizeof(struct hl_info_engine_err_event))
+		return -ENOMEM;
+
+	rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event));
+	return rc ? -EFAULT : 0;
+}
+
 static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
 {
 	void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
@@ -1001,6 +1023,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_FW_ERR_EVENT:
 		return fw_err_info(hpriv, args);
 
+	case HL_INFO_USER_ENGINE_ERR_EVENT:
+		return engine_err_info(hpriv, args);
+
 	case HL_INFO_DRAM_USAGE:
 		return dram_usage_info(hpriv, args);
 	default:
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 22a9aee9a7c9..c317a95c3b34 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -9589,6 +9589,171 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
 	}
 }
 
+static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
+{
+	enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
+	u16 index;
+
+	switch (event_type) {
+	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
+		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
+		index = event_type - GAUDI2_EVENT_TPC0_QM;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME0_QM:
+		index = 0;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME1_QM:
+		index = 1;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME2_QM:
+		index = 2;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME3_QM:
+		index = 3;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
+	case GAUDI2_EVENT_KDMA_BM_SPMU:
+	case GAUDI2_EVENT_KDMA0_CORE:
+		return GAUDI2_ENGINE_ID_KDMA;
+	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
+	case GAUDI2_EVENT_PDMA0_CORE:
+	case GAUDI2_EVENT_PDMA0_BM_SPMU:
+	case GAUDI2_EVENT_PDMA0_QM:
+		return GAUDI2_ENGINE_ID_PDMA_0;
+	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
+	case GAUDI2_EVENT_PDMA1_CORE:
+	case GAUDI2_EVENT_PDMA1_BM_SPMU:
+	case GAUDI2_EVENT_PDMA1_QM:
+		return GAUDI2_ENGINE_ID_PDMA_1;
+	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
+		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
+		type = GAUDI2_BLOCK_TYPE_DEC;
+		break;
+	case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
+		index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
+		type = GAUDI2_BLOCK_TYPE_DEC;
+		break;
+	case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
+		index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
+		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
+		index = event_type - GAUDI2_EVENT_NIC0_QM0;
+		return GAUDI2_ENGINE_ID_NIC0_0 + index;
+	case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
+		index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
+		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+	case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
+		index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
+	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
+		return GAUDI2_ENGINE_ID_ROT_0;
+	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
+	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
+		return GAUDI2_ENGINE_ID_ROT_1;
+	case GAUDI2_EVENT_HDMA0_BM_SPMU:
+	case GAUDI2_EVENT_HDMA0_QM:
+	case GAUDI2_EVENT_HDMA0_CORE:
+		return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA1_BM_SPMU:
+	case GAUDI2_EVENT_HDMA1_QM:
+	case GAUDI2_EVENT_HDMA1_CORE:
+		return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA2_BM_SPMU:
+	case GAUDI2_EVENT_HDMA2_QM:
+	case GAUDI2_EVENT_HDMA2_CORE:
+		return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA3_BM_SPMU:
+	case GAUDI2_EVENT_HDMA3_QM:
+	case GAUDI2_EVENT_HDMA3_CORE:
+		return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA4_BM_SPMU:
+	case GAUDI2_EVENT_HDMA4_QM:
+	case GAUDI2_EVENT_HDMA4_CORE:
+		return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA5_BM_SPMU:
+	case GAUDI2_EVENT_HDMA5_QM:
+	case GAUDI2_EVENT_HDMA5_CORE:
+		return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA6_BM_SPMU:
+	case GAUDI2_EVENT_HDMA6_QM:
+	case GAUDI2_EVENT_HDMA6_CORE:
+		return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA7_BM_SPMU:
+	case GAUDI2_EVENT_HDMA7_QM:
+	case GAUDI2_EVENT_HDMA7_CORE:
+		return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
+	default:
+		break;
+	}
+
+	switch (type) {
+	case GAUDI2_BLOCK_TYPE_TPC:
+		switch (index) {
+		case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
+			return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
+		case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
+			return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
+		case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
+			return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
+		case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
+			return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
+		default:
+			break;
+		}
+		break;
+	case GAUDI2_BLOCK_TYPE_MME:
+		switch (index) {
+		case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
+		case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
+		case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
+		case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
+		default:
+			break;
+		}
+		break;
+	case GAUDI2_BLOCK_TYPE_DEC:
+		switch (index) {
+		case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
+		case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
+		case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return U16_MAX;
+}
+
 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
@@ -10011,6 +10176,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		}
 	}
 
+	if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
+		hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
+
 	/* Make sure to dump an error in case no error cause was printed so far.
 	 * Note that although we have counted the errors, we use this number as
 	 * a boolean.
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index e6436f3e8ea6..f912869b151e 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -809,6 +809,7 @@ enum hl_server_type {
  * HL_INFO_FW_ERR_EVENT   - Retrieve information on the reported FW error.
  *                          May return 0 even though no new data is available, in that case
  *                          timestamp will be 0.
+ * HL_INFO_USER_ENGINE_ERR_EVENT - Retrieve the last engine id that reported an error.
  */
 #define HL_INFO_HW_IP_INFO			0
 #define HL_INFO_HW_EVENTS			1
@@ -845,6 +846,7 @@ enum hl_server_type {
 #define HL_INFO_FW_GENERIC_REQ			35
 #define HL_INFO_HW_ERR_EVENT			36
 #define HL_INFO_FW_ERR_EVENT			37
+#define HL_INFO_USER_ENGINE_ERR_EVENT		38
 
 #define HL_INFO_VERSION_MAX_LEN			128
 #define HL_INFO_CARD_NAME_MAX_LEN		16
@@ -1226,6 +1228,20 @@ struct hl_info_fw_err_event {
 	__u32 pad;
 };
 
+/**
+ * struct hl_info_engine_err_event - engine error info
+ * @timestamp: time-stamp of error occurrence
+ * @engine_id: engine id who reported the error.
+ * @error_count: Amount of errors reported.
+ * @pad: size padding for u64 granularity.
+ */
+struct hl_info_engine_err_event {
+	__s64 timestamp;
+	__u16 engine_id;
+	__u16 error_count;
+	__u32 pad;
+};
+
 /**
  * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information.
  * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size

From fe77368c0f3e017ec2567b1f3767c1aa5482919e Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Sun, 19 Feb 2023 11:58:46 +0200
Subject: [PATCH 27/77] accel/habanalabs: register compute device as an accel
 device

Register the compute device as an accel device, and remove the creation
of the habanalabs compute char device.

The IOCTLs in this patch are still handled by the current driver
handler. Moving to DRM IOCTL handling requires moving the IOCTLs
numbers to a specific range, so it will be handled in subsequent
patches.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/debugfs.c     |  22 +--
 drivers/accel/habanalabs/common/device.c      | 163 +++++++-----------
 drivers/accel/habanalabs/common/habanalabs.h  |  53 ++----
 .../accel/habanalabs/common/habanalabs_drv.c  | 157 ++++++++---------
 .../habanalabs/common/habanalabs_ioctl.c      |  12 +-
 drivers/accel/habanalabs/common/memory.c      |   4 +-
 6 files changed, 161 insertions(+), 250 deletions(-)

diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c
index 9e84a47a21dc..01f071d52570 100644
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@@ -18,8 +18,6 @@
 #define MMU_KBUF_SIZE		(MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
 #define I2C_MAX_TRANSACTION_LEN	8
 
-static struct dentry *hl_debug_root;
-
 static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 				u8 i2c_reg, u8 i2c_len, u64 *val)
 {
@@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 {
 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 
-	dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root);
+	dev_entry->root = hdev->drm.accel->debugfs_root;
 
 	add_files_to_device(hdev, dev_entry, dev_entry->root);
+
 	if (!hdev->asic_prop.fw_security_enabled)
 		add_secured_nodes(dev_entry, dev_entry->root);
 }
 
-void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-	struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
-
-	debugfs_remove_recursive(entry->root);
-}
-
 void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 {
 	struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
@@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
 
 	up_write(&dev_entry->state_dump_sem);
 }
-
-void __init hl_debugfs_init(void)
-{
-	hl_debug_root = debugfs_create_dir("habanalabs", NULL);
-}
-
-void hl_debugfs_fini(void)
-{
-	debugfs_remove_recursive(hl_debug_root);
-}
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 80cce6b74d05..c0c9e9504672 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -14,6 +14,9 @@
 #include <linux/hwmon.h>
 #include <linux/vmalloc.h>
 
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+
 #include <trace/events/habanalabs.h>
 
 #define HL_RESET_DELAY_USEC			10000	/* 10ms */
@@ -520,24 +523,20 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message
 }
 
 /*
- * hl_device_release - release function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
+ * hl_device_release() - release function for habanalabs device.
+ * @ddev: pointer to DRM device structure.
+ * @file: pointer to DRM file private data structure.
  *
  * Called when process closes an habanalabs device
  */
-static int hl_device_release(struct inode *inode, struct file *filp)
+void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
 {
-	struct hl_fpriv *hpriv = filp->private_data;
-	struct hl_device *hdev = hpriv->hdev;
-
-	filp->private_data = NULL;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+	struct hl_device *hdev = to_hl_device(ddev);
 
 	if (!hdev) {
 		pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
 		put_pid(hpriv->taskpid);
-		return 0;
 	}
 
 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
@@ -555,8 +554,6 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	}
 
 	hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif;
-
-	return 0;
 }
 
 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
@@ -587,18 +584,8 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-/*
- * hl_mmap - mmap function for habanalabs device
- *
- * @*filp: pointer to file structure
- * @*vma: pointer to vm_area_struct of the process
- *
- * Called when process does an mmap on habanalabs device. Call the relevant mmap
- * function at the end of the common code.
- */
-static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 {
-	struct hl_fpriv *hpriv = filp->private_data;
 	struct hl_device *hdev = hpriv->hdev;
 	unsigned long vm_pgoff;
 
@@ -621,14 +608,22 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 	return -EINVAL;
 }
 
-static const struct file_operations hl_ops = {
-	.owner = THIS_MODULE,
-	.open = hl_device_open,
-	.release = hl_device_release,
-	.mmap = hl_mmap,
-	.unlocked_ioctl = hl_ioctl,
-	.compat_ioctl = hl_ioctl
-};
+/*
+ * hl_mmap - mmap function for habanalabs device
+ *
+ * @*filp: pointer to file structure
+ * @*vma: pointer to vm_area_struct of the process
+ *
+ * Called when process does an mmap on habanalabs device. Call the relevant mmap
+ * function at the end of the common code.
+ */
+int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+
+	return __hl_mmap(hpriv, vma);
+}
 
 static const struct file_operations hl_ctrl_ops = {
 	.owner = THIS_MODULE,
@@ -656,7 +651,7 @@ static void device_release_func(struct device *dev)
  *
  * Initialize a cdev and a Linux device for habanalabs's device.
  */
-static int device_init_cdev(struct hl_device *hdev, struct class *class,
+static int device_init_cdev(struct hl_device *hdev, const struct class *class,
 				int minor, const struct file_operations *fops,
 				char *name, struct cdev *cdev,
 				struct device **dev)
@@ -680,23 +675,26 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class,
 
 static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
 {
+	const struct class *accel_class = hdev->drm.accel->kdev->class;
+	char name[32];
 	int rc;
 
-	rc = cdev_device_add(&hdev->cdev, hdev->dev);
-	if (rc) {
-		dev_err(hdev->dev,
-			"failed to add a char device to the system\n");
+	hdev->cdev_idx = hdev->drm.accel->index;
+
+	/* Initialize cdev and device structures for the control device */
+	snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx);
+	rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name,
+				&hdev->cdev_ctrl, &hdev->dev_ctrl);
+	if (rc)
 		return rc;
-	}
 
 	rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
 	if (rc) {
-		dev_err(hdev->dev,
-			"failed to add a control char device to the system\n");
-		goto delete_cdev_device;
+		dev_err(hdev->dev_ctrl,
+			"failed to add an accel control char device to the system\n");
+		goto free_ctrl_device;
 	}
 
-	/* hl_sysfs_init() must be done after adding the device to the system */
 	rc = hl_sysfs_init(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "failed to initialize sysfs\n");
@@ -711,23 +709,19 @@ static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
 
 delete_ctrl_cdev_device:
 	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-delete_cdev_device:
-	cdev_device_del(&hdev->cdev, hdev->dev);
+free_ctrl_device:
+	put_device(hdev->dev_ctrl);
 	return rc;
 }
 
 static void cdev_sysfs_debugfs_remove(struct hl_device *hdev)
 {
 	if (!hdev->cdev_sysfs_debugfs_created)
-		goto put_devices;
+		return;
 
-	hl_debugfs_remove_device(hdev);
 	hl_sysfs_fini(hdev);
-	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-	cdev_device_del(&hdev->cdev, hdev->dev);
 
-put_devices:
-	put_device(hdev->dev);
+	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 	put_device(hdev->dev_ctrl);
 }
 
@@ -2011,51 +2005,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
 }
 
-static int create_cdev(struct hl_device *hdev)
-{
-	char *name;
-	int rc;
-
-	hdev->cdev_idx = hdev->id / 2;
-
-	name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto out_err;
-	}
-
-	/* Initialize cdev and device structures */
-	rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name,
-				&hdev->cdev, &hdev->dev);
-
-	kfree(name);
-
-	if (rc)
-		goto out_err;
-
-	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto free_dev;
-	}
-
-	/* Initialize cdev and device structures for control device */
-	rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops,
-				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
-
-	kfree(name);
-
-	if (rc)
-		goto free_dev;
-
-	return 0;
-
-free_dev:
-	put_device(hdev->dev);
-out_err:
-	return rc;
-}
-
 /*
  * hl_device_init - main initialization function for habanalabs device
  *
@@ -2070,14 +2019,10 @@ int hl_device_init(struct hl_device *hdev)
 	int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
 	bool expose_interfaces_on_err = false;
 
-	rc = create_cdev(hdev);
-	if (rc)
-		goto out_disabled;
-
 	/* Initialize ASIC function pointers and perform early init */
 	rc = device_early_init(hdev);
 	if (rc)
-		goto free_dev;
+		goto out_disabled;
 
 	user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
 				hdev->asic_prop.user_interrupt_count;
@@ -2264,6 +2209,14 @@ int hl_device_init(struct hl_device *hdev)
 	 * From here there is no need to expose them in case of an error.
 	 */
 	expose_interfaces_on_err = false;
+
+	rc = drm_dev_register(&hdev->drm, 0);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc);
+		rc = 0;
+		goto out_disabled;
+	}
+
 	rc = cdev_sysfs_debugfs_add(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n");
@@ -2332,15 +2285,14 @@ int hl_device_init(struct hl_device *hdev)
 	kfree(hdev->user_interrupt);
 early_fini:
 	device_early_fini(hdev);
-free_dev:
-	put_device(hdev->dev_ctrl);
-	put_device(hdev->dev);
 out_disabled:
 	hdev->disabled = true;
-	if (expose_interfaces_on_err)
+	if (expose_interfaces_on_err) {
+		drm_dev_register(&hdev->drm, 0);
 		cdev_sysfs_debugfs_add(hdev);
-	dev_err(&hdev->pdev->dev,
-		"Failed to initialize hl%d. Device %s is NOT usable !\n",
+	}
+
+	pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n",
 		hdev->cdev_idx, dev_name(&hdev->pdev->dev));
 
 	return rc;
@@ -2486,6 +2438,7 @@ void hl_device_fini(struct hl_device *hdev)
 
 	/* Hide devices and sysfs/debugfs files from user */
 	cdev_sysfs_debugfs_remove(hdev);
+	drm_dev_unregister(&hdev->drm);
 
 	hl_debugfs_device_fini(hdev);
 
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 2bd3dedfa4b5..1513f747b7d8 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -29,6 +29,9 @@
 #include <linux/coresight.h>
 #include <linux/dma-buf.h>
 
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
 #include "security.h"
 
 #define HL_NAME				"habanalabs"
@@ -2258,7 +2261,7 @@ struct hl_notifier_event {
 /**
  * struct hl_fpriv - process information stored in FD private data.
  * @hdev: habanalabs device structure.
- * @filp: pointer to the given file structure.
+ * @filp: pointer to the DRM file private data structure.
  * @taskpid: current process ID.
  * @ctx: current executing context. TODO: remove for multiple ctx per process
  * @ctx_mgr: context manager to handle multiple context for this FD.
@@ -2273,7 +2276,7 @@ struct hl_notifier_event {
  */
 struct hl_fpriv {
 	struct hl_device		*hdev;
-	struct file			*filp;
+	struct drm_file			*file_priv;
 	struct pid			*taskpid;
 	struct hl_ctx			*ctx;
 	struct hl_ctx_mgr		ctx_mgr;
@@ -3141,8 +3144,7 @@ struct hl_reset_info {
  *		   (required only for PCI address match mode)
  * @pcie_bar: array of available PCIe bars virtual addresses.
  * @rmmio: configuration area address on SRAM.
- * @hclass: pointer to the habanalabs class.
- * @cdev: related char device.
+ * @drm: related DRM device.
  * @cdev_ctrl: char device for control operations only (INFO IOCTL)
  * @dev: related kernel basic device structure.
  * @dev_ctrl: related kernel device structure for the control device
@@ -3269,8 +3271,7 @@ struct hl_reset_info {
  * @rotator_binning: contains mask of rotators engines that is received from the f/w
  *			which indicates which rotator engines are binned-out(Gaudi3 and above).
  * @id: device minor.
- * @id_control: minor of the control device.
- * @cdev_idx: char device index. Used for setting its name.
+ * @cdev_idx: char device index.
  * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
  *                    addresses.
  * @is_in_dram_scrub: true if dram scrub operation is on going.
@@ -3332,8 +3333,7 @@ struct hl_device {
 	u64				pcie_bar_phys[HL_PCI_NUM_BARS];
 	void __iomem			*pcie_bar[HL_PCI_NUM_BARS];
 	void __iomem			*rmmio;
-	struct class			*hclass;
-	struct cdev			cdev;
+	struct drm_device		drm;
 	struct cdev			cdev_ctrl;
 	struct device			*dev;
 	struct device			*dev_ctrl;
@@ -3442,7 +3442,6 @@ struct hl_device {
 	u32				device_release_watchdog_timeout_sec;
 	u32				rotator_binning;
 	u16				id;
-	u16				id_control;
 	u16				cdev_idx;
 	u16				cpu_pci_msb_addr;
 	u8				is_in_dram_scrub;
@@ -3606,6 +3605,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size,
 	return false;
 }
 
+static inline struct hl_device *to_hl_device(struct drm_device *ddev)
+{
+	return container_of(ddev, struct hl_device, drm);
+}
+
 /**
  * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
  * @address: The start address of the area we want to validate.
@@ -3644,7 +3648,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type);
 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
 			u64 addr, u64 *val, enum debugfs_access_type acc_type);
-int hl_device_open(struct inode *inode, struct file *filp);
+
+int hl_mmap(struct file *filp, struct vm_area_struct *vma);
+
+int hl_device_open(struct drm_device *drm, struct drm_file *file_priv);
+void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv);
+
 int hl_device_open_ctrl(struct inode *inode, struct file *filp);
 bool hl_device_operational(struct hl_device *hdev,
 		enum hl_device_status *status);
@@ -3973,12 +3982,9 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
 
 #ifdef CONFIG_DEBUG_FS
 
-void hl_debugfs_init(void);
-void hl_debugfs_fini(void);
 int hl_debugfs_device_init(struct hl_device *hdev);
 void hl_debugfs_device_fini(struct hl_device *hdev);
 void hl_debugfs_add_device(struct hl_device *hdev);
-void hl_debugfs_remove_device(struct hl_device *hdev);
 void hl_debugfs_add_file(struct hl_fpriv *hpriv);
 void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
 void hl_debugfs_add_cb(struct hl_cb *cb);
@@ -3997,31 +4003,10 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
 
 #else
 
-static inline void __init hl_debugfs_init(void)
-{
-}
-
-static inline void hl_debugfs_fini(void)
-{
-}
-
-static inline int hl_debugfs_device_init(struct hl_device *hdev)
-{
-	return 0;
-}
-
-static inline void hl_debugfs_device_fini(struct hl_device *hdev)
-{
-}
-
 static inline void hl_debugfs_add_device(struct hl_device *hdev)
 {
 }
 
-static inline void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-}
-
 static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 {
 }
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 7263e84c1a4d..6341b8362b3e 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -14,6 +14,10 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
+
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/habanalabs.h>
@@ -27,7 +31,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC);
 MODULE_LICENSE("GPL v2");
 
 static int hl_major;
-static struct class *hl_class;
 static DEFINE_IDR(hl_devs_idr);
 static DEFINE_MUTEX(hl_devs_idr_lock);
 
@@ -70,6 +73,31 @@ static const struct pci_device_id ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, ids);
 
+static const struct file_operations hl_fops = {
+	.owner = THIS_MODULE,
+	.open = accel_open,
+	.release = drm_release,
+	.unlocked_ioctl = hl_ioctl,
+	.compat_ioctl = hl_ioctl,
+	.llseek = noop_llseek,
+	.mmap = hl_mmap
+};
+
+static const struct drm_driver hl_driver = {
+	.driver_features = DRIVER_COMPUTE_ACCEL,
+
+	.name = HL_NAME,
+	.desc = HL_DRIVER_DESC,
+	.major = LINUX_VERSION_MAJOR,
+	.minor = LINUX_VERSION_PATCHLEVEL,
+	.patchlevel = LINUX_VERSION_SUBLEVEL,
+	.date = "20190505",
+
+	.fops = &hl_fops,
+	.open = hl_device_open,
+	.postclose = hl_device_release
+};
+
 /*
  * get_asic_type - translate device id to asic type
  *
@@ -123,43 +151,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
 }
 
 /*
- * hl_device_open - open function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
+ * hl_device_open() - open function for habanalabs device.
+ * @ddev: pointer to DRM device structure.
+ * @file: pointer to DRM file private data structure.
  *
  * Called when process opens an habanalabs device.
  */
-int hl_device_open(struct inode *inode, struct file *filp)
+int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
 {
+	struct hl_device *hdev = to_hl_device(ddev);
 	enum hl_device_status status;
-	struct hl_device *hdev;
 	struct hl_fpriv *hpriv;
 	int rc;
 
-	mutex_lock(&hl_devs_idr_lock);
-	hdev = idr_find(&hl_devs_idr, iminor(inode));
-	mutex_unlock(&hl_devs_idr_lock);
-
-	if (!hdev) {
-		pr_err("Couldn't find device %d:%d\n",
-			imajor(inode), iminor(inode));
-		return -ENXIO;
-	}
-
 	hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 	if (!hpriv)
 		return -ENOMEM;
 
 	hpriv->hdev = hdev;
-	filp->private_data = hpriv;
-	hpriv->filp = filp;
-
 	mutex_init(&hpriv->notifier_event.lock);
 	mutex_init(&hpriv->restore_phase_mutex);
 	mutex_init(&hpriv->ctx_lock);
 	kref_init(&hpriv->refcount);
-	nonseekable_open(inode, filp);
 
 	hl_ctx_mgr_init(&hpriv->ctx_mgr);
 	hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
@@ -225,6 +238,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	hdev->last_successful_open_jif = jiffies;
 	hdev->last_successful_open_ktime = ktime_get();
 
+	file_priv->driver_priv = hpriv;
+	hpriv->file_priv = file_priv;
+
 	return 0;
 
 out_err:
@@ -232,7 +248,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	hl_mem_mgr_fini(&hpriv->mem_mgr);
 	hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
-	filp->private_data = NULL;
 	mutex_destroy(&hpriv->ctx_lock);
 	mutex_destroy(&hpriv->restore_phase_mutex);
 	mutex_destroy(&hpriv->notifier_event.lock);
@@ -268,7 +283,6 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 	 */
 	hpriv->hdev = hdev;
 	filp->private_data = hpriv;
-	hpriv->filp = filp;
 
 	mutex_init(&hpriv->notifier_event.lock);
 	nonseekable_open(inode, filp);
@@ -317,7 +331,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
 	hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
 
 	hdev->major = hl_major;
-	hdev->hclass = hl_class;
 	hdev->memory_scrub = memory_scrub;
 	hdev->reset_on_lockup = reset_on_lockup;
 	hdev->boot_error_status_mask = boot_error_status_mask;
@@ -383,6 +396,31 @@ static int fixup_device_params(struct hl_device *hdev)
 	return 0;
 }
 
+static int allocate_device_id(struct hl_device *hdev)
+{
+	int id;
+
+	mutex_lock(&hl_devs_idr_lock);
+	id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
+	mutex_unlock(&hl_devs_idr_lock);
+
+	if (id < 0) {
+		if (id == -ENOSPC)
+			pr_err("too many devices in the system\n");
+		return -EBUSY;
+	}
+
+	hdev->id = id;
+
+	/*
+	 * Firstly initialized with the internal device ID.
+	 * Will be updated later after the DRM device registration to hold the minor ID.
+	 */
+	hdev->cdev_idx = hdev->id;
+
+	return 0;
+}
+
 /**
  * create_hdev - create habanalabs device instance
  *
@@ -395,14 +433,16 @@ static int fixup_device_params(struct hl_device *hdev)
  */
 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 {
-	int main_id, ctrl_id = 0, rc = 0;
 	struct hl_device *hdev;
+	int rc;
 
 	*dev = NULL;
 
-	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
-	if (!hdev)
-		return -ENOMEM;
+	hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm);
+	if (IS_ERR(hdev))
+		return PTR_ERR(hdev);
+
+	hdev->dev = hdev->drm.dev;
 
 	/* Will be NULL in case of simulator device */
 	hdev->pdev = pdev;
@@ -425,7 +465,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 	if (hdev->asic_type == ASIC_INVALID) {
 		dev_err(&pdev->dev, "Unsupported ASIC\n");
 		rc = -ENODEV;
-		goto free_hdev;
+		goto out_err;
 	}
 
 	copy_kernel_module_params_to_device(hdev);
@@ -434,42 +474,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 
 	fixup_device_params(hdev);
 
-	mutex_lock(&hl_devs_idr_lock);
-
-	/* Always save 2 numbers, 1 for main device and 1 for control.
-	 * They must be consecutive
-	 */
-	main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
-
-	if (main_id >= 0)
-		ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
-					main_id + 2, GFP_KERNEL);
-
-	mutex_unlock(&hl_devs_idr_lock);
-
-	if ((main_id < 0) || (ctrl_id < 0)) {
-		if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
-			pr_err("too many devices in the system\n");
-
-		if (main_id >= 0) {
-			mutex_lock(&hl_devs_idr_lock);
-			idr_remove(&hl_devs_idr, main_id);
-			mutex_unlock(&hl_devs_idr_lock);
-		}
-
-		rc = -EBUSY;
-		goto free_hdev;
-	}
-
-	hdev->id = main_id;
-	hdev->id_control = ctrl_id;
+	rc = allocate_device_id(hdev);
+	if (rc)
+		goto out_err;
 
 	*dev = hdev;
 
 	return 0;
 
-free_hdev:
-	kfree(hdev);
+out_err:
 	return rc;
 }
 
@@ -484,10 +497,8 @@ static void destroy_hdev(struct hl_device *hdev)
 	/* Remove device from the device list */
 	mutex_lock(&hl_devs_idr_lock);
 	idr_remove(&hl_devs_idr, hdev->id);
-	idr_remove(&hl_devs_idr, hdev->id_control);
 	mutex_unlock(&hl_devs_idr_lock);
 
-	kfree(hdev);
 }
 
 static int hl_pmops_suspend(struct device *dev)
@@ -691,28 +702,16 @@ static int __init hl_init(void)
 
 	hl_major = MAJOR(dev);
 
-	hl_class = class_create(HL_NAME);
-	if (IS_ERR(hl_class)) {
-		pr_err("failed to allocate class\n");
-		rc = PTR_ERR(hl_class);
-		goto remove_major;
-	}
-
-	hl_debugfs_init();
-
 	rc = pci_register_driver(&hl_pci_driver);
 	if (rc) {
 		pr_err("failed to register pci device\n");
-		goto remove_debugfs;
+		goto remove_major;
 	}
 
 	pr_debug("driver loaded\n");
 
 	return 0;
 
-remove_debugfs:
-	hl_debugfs_fini();
-	class_destroy(hl_class);
 remove_major:
 	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 	return rc;
@@ -725,14 +724,6 @@ static void __exit hl_exit(void)
 {
 	pci_unregister_driver(&hl_pci_driver);
 
-	/*
-	 * Removing debugfs must be after all devices or simulator devices
-	 * have been removed because otherwise we get a bug in the
-	 * debugfs module for referencing NULL objects
-	 */
-	hl_debugfs_fini();
-
-	class_destroy(hl_class);
 	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 
 	idr_destroy(&hl_devs_idr);
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 097d65e493c8..28c3793e802f 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -1166,10 +1166,9 @@ static const struct hl_ioctl_desc hl_ioctls_control[] = {
 	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
 };
 
-static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
-		const struct hl_ioctl_desc *ioctl, struct device *dev)
+static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
+			const struct hl_ioctl_desc *ioctl, struct device *dev)
 {
-	struct hl_fpriv *hpriv = filep->private_data;
 	unsigned int nr = _IOC_NR(cmd);
 	char stack_kdata[128] = {0};
 	char *kdata = NULL;
@@ -1235,7 +1234,8 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
 
 long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 {
-	struct hl_fpriv *hpriv = filep->private_data;
+	struct drm_file *file_priv = filep->private_data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
 	const struct hl_ioctl_desc *ioctl = NULL;
 	unsigned int nr = _IOC_NR(cmd);
@@ -1256,7 +1256,7 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 		return -ENOTTY;
 	}
 
-	return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
+	return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev);
 }
 
 long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -1282,5 +1282,5 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 		return -ENOTTY;
 	}
 
-	return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
+	return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl);
 }
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 4fc72a07d2f5..45fdf39bfc8c 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1818,7 +1818,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
 	hl_ctx_put(ctx);
 
 	/* Paired with get_file() in export_dmabuf() */
-	fput(ctx->hpriv->filp);
+	fput(ctx->hpriv->file_priv->filp);
 
 	kfree(hl_dmabuf);
 }
@@ -1864,7 +1864,7 @@ static int export_dmabuf(struct hl_ctx *ctx,
 	 * released first and only then the compute device.
 	 * Paired with fput() in hl_release_dmabuf().
 	 */
-	get_file(ctx->hpriv->filp);
+	get_file(ctx->hpriv->file_priv->filp);
 
 	*dmabuf_fd = fd;
 

From 13312360ef3a7834080ac3b96ed049abfcbf3d4b Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 23 Mar 2023 14:50:00 +0200
Subject: [PATCH 28/77] accel/habanalabs: update sysfs-driver-habanalabs with
 the accel path

Replace "/sys/class/habanalabs/hl<n>/..." with
"/sys/class/accel/accel<n>/device/...".

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/sysfs-driver-habanalabs       | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index 1b98b6503b23..c63ca1ad500d 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -1,4 +1,4 @@
-What:           /sys/class/habanalabs/hl<n>/armcp_kernel_ver
+What:           /sys/class/accel/accel<n>/device/armcp_kernel_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -6,7 +6,7 @@ Description:    Version of the Linux kernel running on the device's CPU.
                 Will be DEPRECATED in Linux kernel version 5.10, and be
                 replaced with cpucp_kernel_ver
 
-What:           /sys/class/habanalabs/hl<n>/armcp_ver
+What:           /sys/class/accel/accel<n>/device/armcp_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -14,7 +14,7 @@ Description:    Version of the application running on the device's CPU
                 Will be DEPRECATED in Linux kernel version 5.10, and be
                 replaced with cpucp_ver
 
-What:           /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
+What:           /sys/class/accel/accel<n>/device/clk_max_freq_mhz
 Date:           Jun 2019
 KernelVersion:  5.7
 Contact:        ogabbay@kernel.org
@@ -24,58 +24,58 @@ Description:    Allows the user to set the maximum clock frequency, in MHz.
                 frequency value of the device clock. This property is valid
                 only for the Gaudi ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
+What:           /sys/class/accel/accel<n>/device/clk_cur_freq_mhz
 Date:           Jun 2019
 KernelVersion:  5.7
 Contact:        ogabbay@kernel.org
 Description:    Displays the current frequency, in MHz, of the device clock.
                 This property is valid only for the Gaudi ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/cpld_ver
+What:           /sys/class/accel/accel<n>/device/cpld_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the Device's CPLD F/W
 
-What:           /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
+What:           /sys/class/accel/accel<n>/device/cpucp_kernel_ver
 Date:           Oct 2020
 KernelVersion:  5.10
 Contact:        ogabbay@kernel.org
 Description:    Version of the Linux kernel running on the device's CPU
 
-What:           /sys/class/habanalabs/hl<n>/cpucp_ver
+What:           /sys/class/accel/accel<n>/device/cpucp_ver
 Date:           Oct 2020
 KernelVersion:  5.10
 Contact:        ogabbay@kernel.org
 Description:    Version of the application running on the device's CPU
 
-What:           /sys/class/habanalabs/hl<n>/device_type
+What:           /sys/class/accel/accel<n>/device/device_type
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the code name of the device according to its type.
                 The supported values are: "GOYA"
 
-What:           /sys/class/habanalabs/hl<n>/eeprom
+What:           /sys/class/accel/accel<n>/device/eeprom
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    A binary file attribute that contains the contents of the
                 on-board EEPROM
 
-What:           /sys/class/habanalabs/hl<n>/fuse_ver
+What:           /sys/class/accel/accel<n>/device/fuse_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the device's version from the eFuse
 
-What:           /sys/class/habanalabs/hl<n>/fw_os_ver
+What:           /sys/class/accel/accel<n>/device/fw_os_ver
 Date:           Dec 2021
 KernelVersion:  5.18
 Contact:        ogabbay@kernel.org
 Description:    Version of the firmware OS running on the device's CPU
 
-What:           /sys/class/habanalabs/hl<n>/hard_reset
+What:           /sys/class/accel/accel<n>/device/hard_reset
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -83,14 +83,14 @@ Description:    Interface to trigger a hard-reset operation for the device.
                 Hard-reset will reset ALL internal components of the device
                 except for the PCI interface and the internal PLLs
 
-What:           /sys/class/habanalabs/hl<n>/hard_reset_cnt
+What:           /sys/class/accel/accel<n>/device/hard_reset_cnt
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays how many times the device have undergone a hard-reset
                 operation since the driver was loaded
 
-What:           /sys/class/habanalabs/hl<n>/high_pll
+What:           /sys/class/accel/accel<n>/device/high_pll
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -98,7 +98,7 @@ Description:    Allows the user to set the maximum clock frequency for MME, TPC
                 and IC when the power management profile is set to "automatic".
                 This property is valid only for the Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/ic_clk
+What:           /sys/class/accel/accel<n>/device/ic_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -110,27 +110,27 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
                 frequency value of the IC. This property is valid only for the
                 Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/ic_clk_curr
+What:           /sys/class/accel/accel<n>/device/ic_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the Interconnect
                 fabric. This property is valid only for the Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/infineon_ver
+What:           /sys/class/accel/accel<n>/device/infineon_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
 
-What:           /sys/class/habanalabs/hl<n>/max_power
+What:           /sys/class/accel/accel<n>/device/max_power
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum power consumption of the
                 device in milliwatts.
 
-What:           /sys/class/habanalabs/hl<n>/mme_clk
+What:           /sys/class/accel/accel<n>/device/mme_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -142,21 +142,21 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
                 frequency value of the MME. This property is valid only for the
                 Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/mme_clk_curr
+What:           /sys/class/accel/accel<n>/device/mme_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the MME compute
                 engine. This property is valid only for the Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/pci_addr
+What:           /sys/class/accel/accel<n>/device/pci_addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the PCI address of the device. This is needed so the
                 user would be able to open a device based on its PCI address
 
-What:           /sys/class/habanalabs/hl<n>/pm_mng_profile
+What:           /sys/class/accel/accel<n>/device/pm_mng_profile
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -170,19 +170,19 @@ Description:    Power management profile. Values are "auto", "manual". In "auto"
                 ic_clk, mme_clk and tpc_clk. This property is valid only for
                 the Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/preboot_btl_ver
+What:           /sys/class/accel/accel<n>/device/preboot_btl_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the device's preboot F/W code
 
-What:           /sys/class/habanalabs/hl<n>/security_enabled
+What:           /sys/class/accel/accel<n>/device/security_enabled
 Date:           Oct 2022
 KernelVersion:  6.1
 Contact:        obitton@habana.ai
 Description:    Displays the device's security status
 
-What:           /sys/class/habanalabs/hl<n>/soft_reset
+What:           /sys/class/accel/accel<n>/device/soft_reset
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -190,14 +190,14 @@ Description:    Interface to trigger a soft-reset operation for the device.
                 Soft-reset will reset only the compute and DMA engines of the
                 device
 
-What:           /sys/class/habanalabs/hl<n>/soft_reset_cnt
+What:           /sys/class/accel/accel<n>/device/soft_reset_cnt
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays how many times the device have undergone a soft-reset
                 operation since the driver was loaded
 
-What:           /sys/class/habanalabs/hl<n>/status
+What:           /sys/class/accel/accel<n>/device/status
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -215,13 +215,13 @@ Description:    Status of the card:
                     a compute-reset which is executed after a device release
                     (relevant for Gaudi2 only).
 
-What:           /sys/class/habanalabs/hl<n>/thermal_ver
+What:           /sys/class/accel/accel<n>/device/thermal_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the Device's thermal daemon
 
-What:           /sys/class/habanalabs/hl<n>/tpc_clk
+What:           /sys/class/accel/accel<n>/device/tpc_clk
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -233,20 +233,20 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
                 frequency value of the TPC. This property is valid only for
                 Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/tpc_clk_curr
+What:           /sys/class/accel/accel<n>/device/tpc_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the TPC compute
                 engines. This property is valid only for the Goya ASIC family
 
-What:           /sys/class/habanalabs/hl<n>/uboot_ver
+What:           /sys/class/accel/accel<n>/device/uboot_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Version of the u-boot running on the device's CPU
 
-What:           /sys/class/habanalabs/hl<n>/vrm_ver
+What:           /sys/class/accel/accel<n>/device/vrm_ver
 Date:           Jan 2022
 KernelVersion:  5.17
 Contact:        ogabbay@kernel.org

From 38ed55bc58f72d4180f74fd901e71b16d68641b9 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 23 Mar 2023 14:58:34 +0200
Subject: [PATCH 29/77] accel/habanalabs: update debugfs-driver-habanalabs with
 the accel path

Replace "/sys/kernel/debug/habanalabs/hl<n>/..." with
"/sys/kernel/debug/accel/<n>/...".

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/debugfs-driver-habanalabs     | 82 +++++++++----------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index df535780058b..042fd125fbc9 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -1,4 +1,4 @@
-What:           /sys/kernel/debug/habanalabs/hl<n>/addr
+What:           /sys/kernel/debug/accel/<n>/addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -8,34 +8,34 @@ Description:    Sets the device address to be used for read or write through
                 only when the IOMMU is disabled.
                 The acceptable value is a string that starts with "0x"
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/clk_gate
+What:           /sys/kernel/debug/accel/<n>/clk_gate
 Date:           May 2020
 KernelVersion:  5.8
 Contact:        ogabbay@kernel.org
 Description:    This setting is now deprecated as clock gating is handled solely by the f/w
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/command_buffers
+What:           /sys/kernel/debug/accel/<n>/command_buffers
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently allocated
                 command buffers
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/command_submission
+What:           /sys/kernel/debug/accel/<n>/command_submission
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently active
                 command submissions
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/command_submission_jobs
+What:           /sys/kernel/debug/accel/<n>/command_submission_jobs
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with detailed information about each JOB (CB) of
                 each active command submission
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/data32
+What:           /sys/kernel/debug/accel/<n>/data32
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -50,7 +50,7 @@ Description:    Allows the root user to read or write directly through the
                 If the IOMMU is disabled, it also allows the root user to read
                 or write from the host a device VA of a host mapped memory
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/data64
+What:           /sys/kernel/debug/accel/<n>/data64
 Date:           Jan 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@@ -65,7 +65,7 @@ Description:    Allows the root user to read or write 64 bit data directly
                 If the IOMMU is disabled, it also allows the root user to read
                 or write from the host a device VA of a host mapped memory
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/data_dma
+What:           /sys/kernel/debug/accel/<n>/data_dma
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@@ -79,11 +79,11 @@ Description:    Allows the root user to read from the device's internal
                 a very long time.
                 This interface doesn't support concurrency in the same device.
                 In GAUDI and GOYA, this action can cause undefined behavior
-                in case the it is done while the device is executing user
+                in case it is done while the device is executing user
                 workloads.
                 Only supported on GAUDI at this stage.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/device
+What:           /sys/kernel/debug/accel/<n>/device
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -91,14 +91,14 @@ Description:    Enables the root user to set the device to specific state.
                 Valid values are "disable", "enable", "suspend", "resume".
                 User can read this property to see the valid values
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/device_release_watchdog_timeout
+What:           /sys/kernel/debug/accel/<n>/device_release_watchdog_timeout
 Date:           Oct 2022
 KernelVersion:  6.2
 Contact:        ttayar@habana.ai
 Description:    The watchdog timeout value in seconds for a device release upon
                 certain error cases, after which the device is reset.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/dma_size
+What:           /sys/kernel/debug/accel/<n>/dma_size
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@@ -108,7 +108,7 @@ Description:    Specify the size of the DMA transaction when using DMA to read
                 When the write is finished, the user can read the "data_dma"
                 blob
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/dump_razwi_events
+What:           /sys/kernel/debug/accel/<n>/dump_razwi_events
 Date:           Aug 2022
 KernelVersion:  5.20
 Contact:        fkassabri@habana.ai
@@ -117,7 +117,7 @@ Description:    Dumps all razwi events to dmesg if exist.
                 the routine will clear the status register.
                 Usage: cat dump_razwi_events
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
+What:           /sys/kernel/debug/accel/<n>/dump_security_violations
 Date:           Jan 2021
 KernelVersion:  5.12
 Contact:        ogabbay@kernel.org
@@ -125,14 +125,14 @@ Description:    Dumps all security violations to dmesg. This will also ack
                 all security violations meanings those violations will not be
                 dumped next time user calls this API
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/engines
+What:           /sys/kernel/debug/accel/<n>/engines
 Date:           Jul 2019
 KernelVersion:  5.3
 Contact:        ogabbay@kernel.org
 Description:    Displays the status registers values of the device engines and
                 their derived idle status
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
+What:           /sys/kernel/debug/accel/<n>/i2c_addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -140,7 +140,7 @@ Description:    Sets I2C device address for I2C transaction that is generated
                 by the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
+What:           /sys/kernel/debug/accel/<n>/i2c_bus
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -148,7 +148,7 @@ Description:    Sets I2C bus address for I2C transaction that is generated by
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_data
+What:           /sys/kernel/debug/accel/<n>/i2c_data
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -157,7 +157,7 @@ Description:    Triggers an I2C transaction that is generated by the device's
                 reading from the file generates a read transaction, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_len
+What:           /sys/kernel/debug/accel/<n>/i2c_len
 Date:           Dec 2021
 KernelVersion:  5.17
 Contact:        obitton@habana.ai
@@ -165,7 +165,7 @@ Description:    Sets I2C length in bytes for I2C transaction that is generated b
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
+What:           /sys/kernel/debug/accel/<n>/i2c_reg
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -173,35 +173,35 @@ Description:    Sets I2C register id for I2C transaction that is generated by
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/led0
+What:           /sys/kernel/debug/accel/<n>/led0
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the first S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/led1
+What:           /sys/kernel/debug/accel/<n>/led1
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the second S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/led2
+What:           /sys/kernel/debug/accel/<n>/led2
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the third S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/memory_scrub
+What:           /sys/kernel/debug/accel/<n>/memory_scrub
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
 Description:    Allows the root user to scrub the dram memory. The scrubbing
                 value can be set using the debugfs file memory_scrub_val.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/memory_scrub_val
+What:           /sys/kernel/debug/accel/<n>/memory_scrub_val
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
@@ -209,7 +209,7 @@ Description:    The value to which the dram will be set to when the user
                 scrubs the dram using 'memory_scrub' debugfs file and
                 the scrubbing value when using module param 'memory_scrub'
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/mmu
+What:           /sys/kernel/debug/accel/<n>/mmu
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -217,19 +217,19 @@ Description:    Displays the hop values and physical address for a given ASID
                 and virtual address. The user should write the ASID and VA into
                 the file and then read the file to get the result.
                 e.g. to display info about VA 0x1000 for ASID 1 you need to do:
-                echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu
+                echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/mmu_error
+What:           /sys/kernel/debug/accel/<n>/mmu_error
 Date:           Mar 2021
 KernelVersion:  5.12
 Contact:        fkassabri@habana.ai
 Description:    Check and display page fault or access violation mmu errors for
                 all MMUs specified in mmu_cap_mask.
                 e.g. to display error info for MMU hw cap bit 9, you need to do:
-                echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
-                cat /sys/kernel/debug/habanalabs/hl0/mmu_error
+                echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
+                cat /sys/kernel/debug/accel/0/mmu_error
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump
+What:           /sys/kernel/debug/accel/<n>/monitor_dump
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@@ -243,7 +243,7 @@ Description:    Allows the root user to dump monitors status from the device's
                 This interface doesn't support concurrency in the same device.
                 Only supported on GAUDI.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump_trig
+What:           /sys/kernel/debug/accel/<n>/monitor_dump_trig
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@@ -253,14 +253,14 @@ Description:    Triggers dump of monitor data. The value to trigger the operatio
                 When the write is finished, the user can read the "monitor_dump"
                 blob
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/set_power_state
+What:           /sys/kernel/debug/accel/<n>/set_power_state
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the PCI power state. Valid values are "1" for D0 and "2"
                 for D3Hot
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/skip_reset_on_timeout
+What:           /sys/kernel/debug/accel/<n>/skip_reset_on_timeout
 Date:           Jun 2021
 KernelVersion:  5.13
 Contact:        ynudelman@habana.ai
@@ -268,7 +268,7 @@ Description:    Sets the skip reset on timeout option for the device. Value of
                 "0" means device will be reset in case some CS has timed out,
                 otherwise it will not be reset.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/state_dump
+What:           /sys/kernel/debug/accel/<n>/state_dump
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ynudelman@habana.ai
@@ -279,7 +279,7 @@ Description:    Gets the state dump occurring on a CS timeout or failure.
                 Writing an integer X discards X state dumps, so that the
                 next read would return X+1-st newest state dump.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
+What:           /sys/kernel/debug/accel/<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@@ -287,21 +287,21 @@ Description:    Sets the stop-on_error option for the device engines. Value of
                 "0" is for disable, otherwise enable.
                 Relevant only for GOYA and GAUDI.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
+What:           /sys/kernel/debug/accel/<n>/timeout_locked
 Date:           Sep 2021
 KernelVersion:  5.16
 Contact:        obitton@habana.ai
 Description:    Sets the command submission timeout value in seconds.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/userptr
+What:           /sys/kernel/debug/accel/<n>/userptr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-Description:    Displays a list with information about the currently user
+Description:    Displays a list with information about the current user
                 pointers (user virtual addresses) that are pinned and mapped
                 to DMA addresses
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
+What:           /sys/kernel/debug/accel/<n>/userptr_lookup
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ogabbay@kernel.org
@@ -309,7 +309,7 @@ Description:    Allows to search for specific user pointers (user virtual
                 addresses) that are pinned and mapped to DMA addresses, and see
                 their resolution to the specific dma address.
 
-What:           /sys/kernel/debug/habanalabs/hl<n>/vm
+What:           /sys/kernel/debug/accel/<n>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org

From 57963ff8adfee6f2a777bc44c372a390af535300 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 17 Apr 2023 17:05:34 +0300
Subject: [PATCH 30/77] accel/habanalabs: Move ioctls to the device specific
 ioctls range

To use drm_ioctl(), move the ioctls to the device specific ioctls
range at [DRM_COMMAND_BASE, DRM_COMMAND_END).

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../accel/habanalabs/common/command_buffer.c  |  5 +-
 .../habanalabs/common/command_submission.c    |  6 ++-
 drivers/accel/habanalabs/common/habanalabs.h  | 11 ++--
 .../accel/habanalabs/common/habanalabs_drv.c  | 18 +++++--
 .../habanalabs/common/habanalabs_ioctl.c      | 53 ++++---------------
 drivers/accel/habanalabs/common/memory.c      |  3 +-
 include/uapi/drm/habanalabs_accel.h           | 39 +++++++-------
 7 files changed, 59 insertions(+), 76 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_buffer.c b/drivers/accel/habanalabs/common/command_buffer.c
index 08f7aee42624..0f0d295116e7 100644
--- a/drivers/accel/habanalabs/common/command_buffer.c
+++ b/drivers/accel/habanalabs/common/command_buffer.c
@@ -361,10 +361,11 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
 	return rc;
 }
 
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
-	union hl_cb_args *args = data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
+	union hl_cb_args *args = data;
 	u64 handle = 0, device_va = 0;
 	enum hl_device_status status;
 	u32 usage_cnt = 0;
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 396bbf8652b7..daaa3cbe0b12 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -2558,8 +2558,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
 	return 0;
 }
 
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	union hl_cs_args *args = data;
 	enum hl_cs_type cs_type = 0;
 	u64 cs_seq = ULONG_MAX;
@@ -3718,8 +3719,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
-int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
 	union hl_wait_cs_args *args = data;
 	u32 flags = args->in.flags;
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 1513f747b7d8..efb046370f2e 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -4118,11 +4118,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset,
 		const u32 pb_blocks[], u32 blocks_array_size);
 
 /* IOCTLs */
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
 
 #endif /* HABANALABSP_H_ */
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 6341b8362b3e..7e66f623f350 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -18,6 +18,7 @@
 
 #include <drm/drm_accel.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_ioctl.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/habanalabs.h>
@@ -73,12 +74,21 @@ static const struct pci_device_id ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, ids);
 
+static const struct drm_ioctl_desc hl_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0),
+};
+
 static const struct file_operations hl_fops = {
 	.owner = THIS_MODULE,
 	.open = accel_open,
 	.release = drm_release,
-	.unlocked_ioctl = hl_ioctl,
-	.compat_ioctl = hl_ioctl,
+	.unlocked_ioctl = drm_ioctl,
+	.compat_ioctl = drm_compat_ioctl,
 	.llseek = noop_llseek,
 	.mmap = hl_mmap
 };
@@ -95,7 +105,9 @@ static const struct drm_driver hl_driver = {
 
 	.fops = &hl_fops,
 	.open = hl_device_open,
-	.postclose = hl_device_release
+	.postclose = hl_device_release,
+	.ioctls = hl_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(hl_drm_ioctls)
 };
 
 /*
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 28c3793e802f..87a6a0c0c48a 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -1095,8 +1095,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	return rc;
 }
 
-static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+
 	return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
 }
 
@@ -1105,10 +1107,11 @@ static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
 	return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
 }
 
-static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
-	struct hl_debug_args *args = data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
+	struct hl_debug_args *args = data;
 	enum hl_device_status status;
 
 	int rc = 0;
@@ -1151,19 +1154,10 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
 }
 
 #define HL_IOCTL_DEF(ioctl, _func) \
-	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
-
-static const struct hl_ioctl_desc hl_ioctls[] = {
-	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
-};
+	[_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func}
 
 static const struct hl_ioctl_desc hl_ioctls_control[] = {
-	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
+	HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control)
 };
 
 static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
@@ -1232,33 +1226,6 @@ static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long ar
 	return retcode;
 }
 
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
-{
-	struct drm_file *file_priv = filep->private_data;
-	struct hl_fpriv *hpriv = file_priv->driver_priv;
-	struct hl_device *hdev = hpriv->hdev;
-	const struct hl_ioctl_desc *ioctl = NULL;
-	unsigned int nr = _IOC_NR(cmd);
-
-	if (!hdev) {
-		pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
-		return -ENODEV;
-	}
-
-	if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
-		ioctl = &hl_ioctls[nr];
-	} else {
-		char task_comm[TASK_COMM_LEN];
-
-		dev_dbg_ratelimited(hdev->dev,
-				"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
-				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
-		return -ENOTTY;
-	}
-
-	return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev);
-}
-
 long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 {
 	struct hl_fpriv *hpriv = filep->private_data;
@@ -1271,8 +1238,8 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 		return -ENODEV;
 	}
 
-	if (nr == _IOC_NR(HL_IOCTL_INFO)) {
-		ioctl = &hl_ioctls_control[nr];
+	if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
+		ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
 	} else {
 		char task_comm[TASK_COMM_LEN];
 
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 45fdf39bfc8c..1b1b4256b011 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -2171,8 +2171,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in
 	return 0;
 }
 
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	enum hl_device_status status;
 	union hl_mem_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index f912869b151e..e7893b082bf8 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -8,8 +8,7 @@
 #ifndef HABANALABS_H_
 #define HABANALABS_H_
 
-#include <linux/types.h>
-#include <linux/ioctl.h>
+#include <drm/drm.h>
 
 /*
  * Defines that are asic-specific but constitutes as ABI between kernel driver
@@ -607,9 +606,9 @@ enum gaudi2_engine_id {
 /*
  * ASIC specific PLL index
  *
- * Used to retrieve in frequency info of different IPs via
- * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be
- * used as an index in struct hl_pll_frequency_info
+ * Used to retrieve in frequency info of different IPs via HL_INFO_PLL_FREQUENCY under
+ * DRM_IOCTL_HL_INFO IOCTL.
+ * The enums need to be used as an index in struct hl_pll_frequency_info.
  */
 
 enum hl_goya_pll_index {
@@ -2163,6 +2162,13 @@ struct hl_debug_args {
 	__u32 ctx_id;
 };
 
+#define HL_IOCTL_INFO		0x00
+#define HL_IOCTL_CB		0x01
+#define HL_IOCTL_CS		0x02
+#define HL_IOCTL_WAIT_CS	0x03
+#define HL_IOCTL_MEMORY		0x04
+#define HL_IOCTL_DEBUG		0x05
+
 /*
  * Various information operations such as:
  * - H/W IP information
@@ -2177,8 +2183,7 @@ struct hl_debug_args {
  * definitions of structures in kernel and userspace, e.g. in case of old
  * userspace and new kernel driver
  */
-#define HL_IOCTL_INFO	\
-		_IOWR('H', 0x01, struct hl_info_args)
+#define DRM_IOCTL_HL_INFO	DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_INFO, struct hl_info_args)
 
 /*
  * Command Buffer
@@ -2199,8 +2204,7 @@ struct hl_debug_args {
  * and won't be returned to user.
  *
  */
-#define HL_IOCTL_CB		\
-		_IOWR('H', 0x02, union hl_cb_args)
+#define DRM_IOCTL_HL_CB		DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CB, union hl_cb_args)
 
 /*
  * Command Submission
@@ -2252,8 +2256,7 @@ struct hl_debug_args {
  * and only if CS N and CS N-1 are exactly the same (same CBs for the same
  * queues).
  */
-#define HL_IOCTL_CS			\
-		_IOWR('H', 0x03, union hl_cs_args)
+#define DRM_IOCTL_HL_CS		DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CS, union hl_cs_args)
 
 /*
  * Wait for Command Submission
@@ -2285,9 +2288,7 @@ struct hl_debug_args {
  * HL_WAIT_CS_STATUS_ABORTED     - The CS was aborted, usually because the
  *                                 device was reset (EIO)
  */
-
-#define HL_IOCTL_WAIT_CS			\
-		_IOWR('H', 0x04, union hl_wait_cs_args)
+#define DRM_IOCTL_HL_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_WAIT_CS, union hl_wait_cs_args)
 
 /*
  * Memory
@@ -2304,8 +2305,7 @@ struct hl_debug_args {
  * There is an option for the user to specify the requested virtual address.
  *
  */
-#define HL_IOCTL_MEMORY		\
-		_IOWR('H', 0x05, union hl_mem_args)
+#define DRM_IOCTL_HL_MEMORY	DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_MEMORY, union hl_mem_args)
 
 /*
  * Debug
@@ -2331,10 +2331,9 @@ struct hl_debug_args {
  * The driver can decide to "kick out" the user if he abuses this interface.
  *
  */
-#define HL_IOCTL_DEBUG		\
-		_IOWR('H', 0x06, struct hl_debug_args)
+#define DRM_IOCTL_HL_DEBUG	DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_DEBUG, struct hl_debug_args)
 
-#define HL_COMMAND_START	0x01
-#define HL_COMMAND_END		0x07
+#define HL_COMMAND_START	(DRM_COMMAND_BASE + HL_IOCTL_INFO)
+#define HL_COMMAND_END		(DRM_COMMAND_BASE + HL_IOCTL_DEBUG + 1)
 
 #endif /* HABANALABS_H_ */

From 01ab1629ad7024ce974015e206555955921b16bc Mon Sep 17 00:00:00 2001
From: Igor Grinberg <igrinberg@habana.ai>
Date: Tue, 4 Jul 2023 15:38:40 +0300
Subject: [PATCH 31/77] accel/habanalabs/gaudi2: prepare to remove
 cpu_rst_status

The soft reset has transitioned to CPUCP packet instead of plain
register write and is about to be removed from the struct cpu_dyn_regs.
As a preparation for removing the cpu_rst_status field from
struct cpu_dyn_regs, switch to use the plain macro - this keeps the
backward compatibility.

Signed-off-by: Igor Grinberg <igrinberg@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index c317a95c3b34..d94acec63d95 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -6254,16 +6254,12 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_
 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
 						u32 poll_timeout_us)
 {
-	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
-	int rc = 0;
+	int rc;
 
 	if (!driver_performs_reset) {
 		if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
 			/* set SP to indicate reset request sent to FW */
-			if (dyn_regs->cpu_rst_status)
-				WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
-			else
-				WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
+			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
 
 			WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
 				gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);

From cf1ed52d12097b743da17dfa34db01f2c18537a5 Mon Sep 17 00:00:00 2001
From: Benjamin Dotan <bdotan@habana.ai>
Date: Thu, 20 Jul 2023 15:32:07 +0300
Subject: [PATCH 32/77] accel/habanalabs/gaudi2 : remove psoc_arc access

Because firmware is blocking PSOC_ARC_DBG, we need to disable access
to this block.

Signed-off-by: Benjamin Dotan <bdotan@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/gaudi2/gaudi2_coresight.c      | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
index 25b5368f37dd..3e90bc969264 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = {
 	[GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE,
 	[GAUDI2_STM_PCIE] = mmPCIE_STM_BASE,
 	[GAUDI2_STM_PSOC] = mmPSOC_STM_BASE,
-	[GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE,
-	[GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE,
+	[GAUDI2_STM_PSOC_ARC0_CS] = 0,
+	[GAUDI2_STM_PSOC_ARC1_CS] = 0,
 	[GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE,
 	[GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE,
 	[GAUDI2_STM_CPU] = mmCPU_STM_BASE,
@@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = {
 	[GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE,
 	[GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE,
 	[GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE,
-	[GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE,
-	[GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE,
+	[GAUDI2_ETF_PSOC_ARC0_CS] = 0,
+	[GAUDI2_ETF_PSOC_ARC1_CS] = 0,
 	[GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE,
 	[GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE,
 	[GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE,
@@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = {
 	[GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
-	[GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE,
-	[GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE,
+	[GAUDI2_FUNNEL_PSOC_ARC0] = 0,
+	[GAUDI2_FUNNEL_PSOC_ARC1] = 0,
 	[GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE,
@@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = {
 	[GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
 	[GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
 	[GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
-	[GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE,
-	[GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE,
-	[GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE,
-	[GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE,
+	[GAUDI2_BMON_PSOC_ARC0_0] = 0,
+	[GAUDI2_BMON_PSOC_ARC0_1] = 0,
+	[GAUDI2_BMON_PSOC_ARC1_0] = 0,
+	[GAUDI2_BMON_PSOC_ARC1_1] = 0,
 	[GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE,
 	[GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE,
 	[GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE,
@@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = {
 	[GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE,
 	[GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE,
-	[GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE,
-	[GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE,
+	[GAUDI2_SPMU_PSOC_ARC0_CS] = 0,
+	[GAUDI2_SPMU_PSOC_ARC1_CS] = 0,
 	[GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE,

From 428f6882a6f9fc0521eb2dca5293436d5a041ff3 Mon Sep 17 00:00:00 2001
From: Benjamin Dotan <bdotan@habana.ai>
Date: Thu, 20 Jul 2023 13:03:43 +0300
Subject: [PATCH 33/77] accel/habanalabs: fix ETR/ETF flush logic

When config_etr or config_etf are called we need to validate the
parameters that are passed into them to make sure the requested
operation is valid.

Signed-off-by: Benjamin Dotan <bdotan@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi/gaudi_coresight.c   | 12 ++++++++++++
 drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c | 10 ++++++++++
 drivers/accel/habanalabs/goya/goya_coresight.c     | 10 ++++++++++
 3 files changed, 32 insertions(+)

diff --git a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
index 3455b14554c6..1168fefa33f4 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
@@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev,
 
 	WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
 
+	val = RREG32(base_reg + 0x20);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + 0x304);
 	val |= 0x1000;
 	WREG32(base_reg + 0x304, val);
@@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev,
 
 	WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
 
+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
+
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
index 3e90bc969264..32e0f1a85b35 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
 	if (rc)
 		return -EIO;
 
+	val = RREG32(base_reg + mmETF_CTL_OFFSET);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + mmETF_FFCR_OFFSET);
 	val |= 0x1000;
 	WREG32(base_reg + mmETF_FFCR_OFFSET, val);
@@ -2189,6 +2194,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx,
 	if (rc)
 		return -EIO;
 
+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
diff --git a/drivers/accel/habanalabs/goya/goya_coresight.c b/drivers/accel/habanalabs/goya/goya_coresight.c
index a6d6cc38bcd8..41cae5fd843b 100644
--- a/drivers/accel/habanalabs/goya/goya_coresight.c
+++ b/drivers/accel/habanalabs/goya/goya_coresight.c
@@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev,
 
 	WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
 
+	val = RREG32(base_reg + 0x20);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + 0x304);
 	val |= 0x1000;
 	WREG32(base_reg + 0x304, val);
@@ -386,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev,
 
 	WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
 
+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);

From a45d5cf09d49ed46c1cf1150ed5a891878a161f3 Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Fri, 25 Aug 2023 22:09:51 +0000
Subject: [PATCH 34/77] accel/habanalabs: refactor deprecated strncpy to
 strscpy_pad

`strncpy` is deprecated for use on NUL-terminated destination strings [1].

We see that `prop->cpucp_info.card_name` is supposed to be
NUL-terminated based on its usage within `__hwmon_device_register()`
(wherein it's called "name"):
|	if (name && (!strlen(name) || strpbrk(name, "-* \t\n")))
|		dev_warn(dev,
|			 "hwmon: '%s' is not a valid name attribute, please fix\n",
|			 name);

A suitable replacement is `strscpy_pad` [2] due to the fact that it
guarantees both NUL-termination and NUL-padding on its destination
buffer.

NUL-padding on `prop->cpucp_info.card_name` is not strictly necessary as
`hdev->prop` is explicitly zero-initialized but should be used
regardless as it gets copied out to userspace directly -- as per Kees'
suggestion.

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1]
Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2]
Link: https://github.com/KSPP/linux/issues/90
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Justin Stitt <justinstitt@google.com>
Suggested-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi/gaudi.c   | 4 ++--
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 5 +++--
 drivers/accel/habanalabs/goya/goya.c     | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index e0f6541eb3d6..29fb71c8b47f 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -664,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 					CARD_NAME_MAX_LEN);
 
 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
@@ -8004,7 +8004,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 		return rc;
 
 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);
 
 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index d94acec63d95..e53de95b1791 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -2528,7 +2528,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+	strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
 
 	prop->mme_master_slave_mode = 1;
 
@@ -2981,7 +2981,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
 	}
 
 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+		strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
+				CARD_NAME_MAX_LEN);
 
 	/* Overwrite binning masks with the actual binning values from F/W */
 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index 7c685e6075f6..024ccf2e159b 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+	strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 		CARD_NAME_MAX_LEN);
 
 	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
@@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	}
 
 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+		strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);
 
 	return 0;

From 90f3de616259cbb5666f00f8daf5420cd7d71d18 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 4 Sep 2023 21:18:36 +0200
Subject: [PATCH 35/77] accel/habanalabs/gaudi2: Fix incorrect string length
 computation in gaudi2_psoc_razwi_get_engines()

snprintf() returns the "number of characters which *would* be generated for
the given input", not the size *really* generated.

In order to avoid too large values for 'str_size' (and potential negative
values for "PSOC_RAZWI_ENG_STR_SIZE - str_size") use scnprintf()
instead of snprintf().

Fixes: c0e6df916050 ("accel/habanalabs: fix address decode RAZWI handling")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index e53de95b1791..d60389b6700f 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -8278,11 +8278,11 @@ static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u
 		eng_id[num_of_eng] = razwi_info[i].eng_id;
 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
 		if (!num_of_eng)
-			str_size += snprintf(eng_name + str_size,
+			str_size += scnprintf(eng_name + str_size,
 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
 						razwi_info[i].eng_name);
 		else
-			str_size += snprintf(eng_name + str_size,
+			str_size += scnprintf(eng_name + str_size,
 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
 						razwi_info[i].eng_name);
 		num_of_eng++;

From 571bfeb48ac24564658e58c0a3a5318904846aae Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Wed, 23 Aug 2023 00:23:08 +0000
Subject: [PATCH 36/77] accel/habanalabs: refactor deprecated strncpy

`strncpy` is deprecated for use on NUL-terminated destination strings [1].

A suitable replacement is `strscpy` [2] due to the fact that it
guarantees NUL-termination on its destination buffer argument which is
_not_ the case for `strncpy`!

There is likely no bug happening in this case since HL_STR_MAX is
strictly larger than all source strings. Nonetheless, prefer a safer and
more robust interface.

It should also be noted that `strscpy` will not pad like `strncpy`. If
this NUL-padding behavior is _required_ we should use `strscpy_pad`
instead of `strscpy`.

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1]
Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2]
Link: https://github.com/KSPP/linux/issues/90
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Justin Stitt <justinstitt@google.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs_drv.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 7e66f623f350..5db9af7e2daf 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -460,14 +460,14 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 	hdev->pdev = pdev;
 
 	/* Assign status description string */
-	strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
-					"in device creation", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
-					"in reset after device release", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
+				"in device creation", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
+				"in reset after device release", HL_STR_MAX);
 
 
 	/* First, we must find out which ASIC are we handling. This is needed

From 10d260f655c1af1b5a5e7d0cea001e3d0461aeaa Mon Sep 17 00:00:00 2001
From: Benjamin Dotan <bdotan@habana.ai>
Date: Wed, 26 Jul 2023 07:58:03 +0300
Subject: [PATCH 37/77] accel/habanalabs: improve etf configuration

coresight ETF blocks have different size. As a result, sync packets
need to be aligned based on fifo size.

Signed-off-by: Benjamin Dotan <bdotan@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
index 32e0f1a85b35..14a855cdc96b 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@@ -2125,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
 		if (!input)
 			return -EINVAL;
 
+		val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2;
+		if (val) {
+			val = ffs(val);
+			WREG32(base_reg + mmETF_PSCR_OFFSET, val);
+		} else {
+			WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
+		}
+
 		WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC);
 		WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode);
 		WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001);
-		WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
 		WREG32(base_reg + mmETF_CTL_OFFSET, 1);
 	} else {
 		WREG32(base_reg + mmETF_BUFWM_OFFSET, 0);

From d261b0ab131e2511b70f7bc4a3737d3b90ca6e87 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 25 Jul 2023 21:11:56 +0300
Subject: [PATCH 38/77] accel/habanalabs/gaudi2: include block id in ECC error
 reporting

During ECC event handling, Memory wrapper id was mistakenly
printed as block id. Fix the print and in addition fetch the actual
block-id from firmware.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c      | 23 +++++++++++++++----
 .../habanalabs/include/common/cpucp_if.h      |  3 ++-
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index d60389b6700f..dca19be42d5f 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7834,16 +7834,29 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		struct hl_eq_ecc_data *ecc_data)
 {
-	u64 ecc_address = 0, ecc_syndrom = 0;
+	u64 ecc_address = 0, ecc_syndrome = 0;
 	u8 memory_wrapper_idx = 0;
+	bool has_block_id = false;
+	u16 block_id;
+
+	if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
+		has_block_id = true;
 
 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
-	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
+	ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
 
-	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
-		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
-		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
+	if (has_block_id) {
+		block_id = le16_to_cpu(ecc_data->block_id);
+		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
+			ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
+			ecc_data->is_critical);
+	} else {
+		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
+			ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
+	}
 
 	return !!ecc_data->is_critical;
 }
diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h
index 33807b839c37..ef7d32224066 100644
--- a/drivers/accel/habanalabs/include/common/cpucp_if.h
+++ b/drivers/accel/habanalabs/include/common/cpucp_if.h
@@ -69,7 +69,8 @@ struct hl_eq_ecc_data {
 	__le64 ecc_syndrom;
 	__u8 memory_wrapper_idx;
 	__u8 is_critical;
-	__u8 pad[6];
+	__le16 block_id;
+	__u8 pad[4];
 };
 
 enum hl_sm_sei_cause {

From 2b76129c5ae710423cfb55806803341af6a403a7 Mon Sep 17 00:00:00 2001
From: David Meriin <dmeriin@habana.ai>
Date: Mon, 24 Jul 2023 23:30:44 +0300
Subject: [PATCH 39/77] accel/habanalabs: move cpucp interface to
 linux/habanalabs

The CPUCP interface is moved to a shared folder outside of accel as
a pre-requisite to upstream the NIC drivers that will also include
this file.

Signed-off-by: David Meriin <dmeriin@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 MAINTAINERS                                         |  1 +
 drivers/accel/habanalabs/common/firmware_if.c       |  2 +-
 drivers/accel/habanalabs/common/habanalabs.h        |  2 +-
 drivers/accel/habanalabs/gaudi/gaudiP.h             |  2 +-
 drivers/accel/habanalabs/gaudi2/gaudi2P.h           |  2 +-
 drivers/accel/habanalabs/goya/goyaP.h               |  2 +-
 .../common => include/linux/habanalabs}/cpucp_if.h  | 13 +++++++++----
 .../linux/habanalabs}/hl_boot_if.h                  |  0
 8 files changed, 15 insertions(+), 9 deletions(-)
 rename {drivers/accel/habanalabs/include/common => include/linux/habanalabs}/cpucp_if.h (99%)
 rename {drivers/accel/habanalabs/include/common => include/linux/habanalabs}/hl_boot_if.h (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index e05506ea8917..c9ff335d2c8c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9071,6 +9071,7 @@ T:	git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
 F:	Documentation/ABI/testing/debugfs-driver-habanalabs
 F:	Documentation/ABI/testing/sysfs-driver-habanalabs
 F:	drivers/accel/habanalabs/
+F:	include/linux/habanalabs/
 F:	include/trace/events/habanalabs.h
 F:	include/uapi/drm/habanalabs_accel.h
 
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 2bc775d29854..2a6dfea3d27d 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -6,7 +6,7 @@
  */
 
 #include "habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 
 #include <linux/firmware.h>
 #include <linux/crc32.h>
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index efb046370f2e..8b5fd2b92676 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -8,7 +8,7 @@
 #ifndef HABANALABSP_H_
 #define HABANALABSP_H_
 
-#include "../include/common/cpucp_if.h"
+#include <linux/habanalabs/cpucp_if.h>
 #include "../include/common/qman_if.h"
 #include "../include/hw_ip/mmu/mmu_general.h"
 #include <uapi/drm/habanalabs_accel.h>
diff --git a/drivers/accel/habanalabs/gaudi/gaudiP.h b/drivers/accel/habanalabs/gaudi/gaudiP.h
index b8fa724be5a1..831be53bb9d7 100644
--- a/drivers/accel/habanalabs/gaudi/gaudiP.h
+++ b/drivers/accel/habanalabs/gaudi/gaudiP.h
@@ -10,7 +10,7 @@
 
 #include <uapi/drm/habanalabs_accel.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../include/gaudi/gaudi_packets.h"
 #include "../include/gaudi/gaudi.h"
 #include "../include/gaudi/gaudi_async_events.h"
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index 5f3ce086928e..4535aa5ab561 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -10,7 +10,7 @@
 
 #include <uapi/drm/habanalabs_accel.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../include/gaudi2/gaudi2.h"
 #include "../include/gaudi2/gaudi2_packets.h"
 #include "../include/gaudi2/gaudi2_fw_if.h"
diff --git a/drivers/accel/habanalabs/goya/goyaP.h b/drivers/accel/habanalabs/goya/goyaP.h
index 5df3d30b91fd..194c2ae157cd 100644
--- a/drivers/accel/habanalabs/goya/goyaP.h
+++ b/drivers/accel/habanalabs/goya/goyaP.h
@@ -9,8 +9,8 @@
 #define GOYAP_H_
 
 #include <uapi/drm/habanalabs_accel.h>
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
 #include "../include/goya/goya_packets.h"
 #include "../include/goya/goya.h"
 #include "../include/goya/goya_async_events.h"
diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
similarity index 99%
rename from drivers/accel/habanalabs/include/common/cpucp_if.h
rename to include/linux/habanalabs/cpucp_if.h
index ef7d32224066..4cdedb603ecb 100644
--- a/drivers/accel/habanalabs/include/common/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -668,7 +668,11 @@ enum pq_init_status {
  *       Packet to register interrupts indicating LKD is ready to receive events from FW.
  *
  * CPUCP_PACKET_SOFT_RESET -
- *	 Packet to perform soft-reset.
+ *      Packet to perform soft-reset.
+ *
+ * CPUCP_PACKET_INTS_REGISTER -
+ *       Packet to inform FW that queues have been established and LKD is ready to receive
+ *       EQ events.
  */
 
 enum cpucp_packet_id {
@@ -734,9 +738,10 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_RESERVED10,		/* not used */
 	CPUCP_PACKET_RESERVED11,		/* not used */
 	CPUCP_PACKET_RESERVED12,		/* internal */
-	CPUCP_PACKET_REGISTER_INTERRUPTS,	/* internal */
-	CPUCP_PACKET_SOFT_RESET,		/* internal */
-	CPUCP_PACKET_ID_MAX			/* must be last */
+	CPUCP_PACKET_RESERVED13,                /* internal */
+	CPUCP_PACKET_SOFT_RESET,                /* internal */
+	CPUCP_PACKET_INTS_REGISTER,             /* internal */
+	CPUCP_PACKET_ID_MAX                     /* must be last */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h
similarity index 100%
rename from drivers/accel/habanalabs/include/common/hl_boot_if.h
rename to include/linux/habanalabs/hl_boot_if.h

From ab574f6a81dd7f4371e573e66fbd6f236ca0fd38 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Sun, 25 Jun 2023 14:11:05 +0300
Subject: [PATCH 40/77] accel/habanalabs: disable events ioctls on control
 device

Because it is not used and also, for graceful reset to work
those ioctls should run on the compute device.

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c           | 13 -------------
 drivers/accel/habanalabs/common/habanalabs_drv.c   |  1 -
 drivers/accel/habanalabs/common/habanalabs_ioctl.c | 11 +++++++++++
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index c0c9e9504672..d9531e434f60 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -572,11 +572,6 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 	list_del(&hpriv->dev_node);
 	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
 out:
-	/* release the eventfd */
-	if (hpriv->notifier_event.eventfd)
-		eventfd_ctx_put(hpriv->notifier_event.eventfd);
-
-	mutex_destroy(&hpriv->notifier_event.lock);
 	put_pid(hpriv->taskpid);
 
 	kfree(hpriv);
@@ -1995,14 +1990,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 		hl_notifier_event_send(&hpriv->notifier_event, event_mask);
 
 	mutex_unlock(&hdev->fpriv_list_lock);
-
-	/* control device */
-	mutex_lock(&hdev->fpriv_ctrl_list_lock);
-
-	list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
-		hl_notifier_event_send(&hpriv->notifier_event, event_mask);
-
-	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
 }
 
 /*
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 5db9af7e2daf..306a5bc9bf89 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -296,7 +296,6 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 	hpriv->hdev = hdev;
 	filp->private_data = hpriv;
 
-	mutex_init(&hpriv->notifier_event.lock);
 	nonseekable_open(inode, filp);
 
 	hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 87a6a0c0c48a..8c3f1e2de2fe 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -1104,6 +1104,17 @@ int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_pri
 
 static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
 {
+	struct hl_info_args *args = data;
+
+	switch (args->op) {
+	case HL_INFO_GET_EVENTS:
+	case HL_INFO_UNREGISTER_EVENTFD:
+	case HL_INFO_REGISTER_EVENTFD:
+		return -EOPNOTSUPP;
+	default:
+		break;
+	}
+
 	return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
 }
 

From e0f452802bdaaf45dcb24080053f2b3a9c7c464a Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Sun, 25 Jun 2023 15:51:42 +0300
Subject: [PATCH 41/77] accel/habanalabs: fix inline doc typos

Fix two typos

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c     | 2 +-
 drivers/accel/habanalabs/common/habanalabs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index d9531e434f60..e217ee6d1768 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -639,7 +639,7 @@ static void device_release_func(struct device *dev)
  * @hdev: pointer to habanalabs device structure
  * @class: pointer to the class object of the device
  * @minor: minor number of the specific device
- * @fpos: file operations to install for this device
+ * @fops: file operations to install for this device
  * @name: name of the device as it will appear in the filesystem
  * @cdev: pointer to the char device object that will be initialized
  * @dev: pointer to the device object that will be initialized
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 8b5fd2b92676..f8c597903cac 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -2261,7 +2261,7 @@ struct hl_notifier_event {
 /**
  * struct hl_fpriv - process information stored in FD private data.
  * @hdev: habanalabs device structure.
- * @filp: pointer to the DRM file private data structure.
+ * @file_priv: pointer to the DRM file private data structure.
  * @taskpid: current process ID.
  * @ctx: current executing context. TODO: remove for multiple ctx per process
  * @ctx_mgr: context manager to handle multiple context for this FD.

From 0648c4d0806fe167fe699299573100507ae99502 Mon Sep 17 00:00:00 2001
From: Hen Alon <halon@habana.ai>
Date: Wed, 9 Aug 2023 17:29:41 +0300
Subject: [PATCH 42/77] accel/habanalabs: add tsc clock sampling to clock sync
 info

Add tsc clock to clock sync info, to enable using this clock for
sampling and sync it with device time.

Signed-off-by: Hen Alon <halon@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs_ioctl.c | 3 +++
 include/uapi/drm/habanalabs_accel.h                | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 8c3f1e2de2fe..8ef36effb95b 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -17,6 +17,8 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 
+#include <asm/msr.h>
+
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
 	[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
 	[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
 
 	time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
 	time_sync.host_time = ktime_get_raw_ns();
+	time_sync.tsc_time = rdtsc();
 
 	return copy_to_user(out, &time_sync,
 		min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index e7893b082bf8..dfe47db24ae6 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -991,6 +991,7 @@ struct hl_info_reset_count {
 struct hl_info_time_sync {
 	__u64 device_time;
 	__u64 host_time;
+	__u64 tsc_time;
 };
 
 /**

From 72bff371b2e26a0088d4cb1b44aee20c77e423ba Mon Sep 17 00:00:00 2001
From: Moti Haimovski <mhaimovski@habana.ai>
Date: Tue, 22 Aug 2023 15:10:55 +0300
Subject: [PATCH 43/77] accel/habanalabs/gaudi2: print power-mode changes

Print to kernel log any device power mode changes events reported by
the FW.

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c             | 12 ++++++++++++
 .../habanalabs/include/gaudi2/gaudi2_async_events.h  |  7 +++++++
 .../include/gaudi2/gaudi2_async_ids_map_extended.h   | 12 ++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index dca19be42d5f..677900e18519 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7798,6 +7798,7 @@ static inline bool is_info_event(u32 event)
 	switch (event) {
 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
+	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
 
 	/* return in case of NIC status event - these events are received periodically and not as
 	 * an indication to an error.
@@ -10178,6 +10179,17 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		is_critical = true;
 		break;
 
+	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
+	case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
+		error_count = GAUDI2_NA_EVENT_CAUSE;
+		dev_info_ratelimited(hdev->dev, "%s event received\n",
+					gaudi2_irq_map_table[event_type].name);
+		break;
+
 	default:
 		if (gaudi2_irq_map_table[event_type].valid) {
 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
index f661068d0c5f..a426410139af 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
@@ -959,6 +959,13 @@ enum gaudi2_async_event_id {
 	GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
 	GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
 	GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
+	GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322,
+	GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327,
+	GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328,
 	GAUDI2_EVENT_SIZE,
 };
 
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
index ad01fc4e9940..6cb0f615fc3e 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
@@ -2673,6 +2673,18 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 		 .name = "FP32_NOT_SUPPORTED" },
 	{ .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD,
 		 .name = "DEV_RESET_REQ" },
+	{ .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "PWR_BRK_ENTRY" },
+	{ .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "PWR_BRK_EXT" },
+	{ .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "PWR_RD_MODE0" },
+	{ .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "PWR_RD_MODE1" },
+	{ .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "PWR_RD_MODE2" },
+	{ .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "PWR_RD_MODE3" },
 };
 
 #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */

From 7c4130e6ddd709be2033a6635c91d445cb2baea5 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Tue, 8 Aug 2023 12:56:47 +0300
Subject: [PATCH 44/77] accel/habanalabs/gaudi2: handle eq health heartbeat
 check

Add mechanism for fw eq health check. this will be done using two flows:
using the heartbeat mechanism and raising a dedicated interrupt to
indicate an eq failure like EQ full.
This patch will add implementation for the eq heartbeat for gaudi2 asic.

More info about the heartbeat mechanism:
Expand the heartbeat mechanism to monitor a new event that
will be sent from FW upon receiving heartbeat message.
that way driver can know that the eq is working or not.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c      | 37 ++++++++++++++++++-
 drivers/accel/habanalabs/common/habanalabs.h  |  2 +
 drivers/accel/habanalabs/gaudi2/gaudi2.c      | 10 +++++
 .../gaudi2/gaudi2_async_ids_map_extended.h    | 14 ++++---
 include/linux/habanalabs/cpucp_if.h           | 14 ++++++-
 5 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index e217ee6d1768..1d1ccd8d5c75 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -989,6 +989,25 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
 	return (vendor_id == PCI_VENDOR_ID_HABANALABS);
 }
 
+static void hl_device_eq_heartbeat(struct hl_device *hdev)
+{
+	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+	 /*
+	  * This feature supported in FW version 1.12.0 45.2.0 and above,
+	  * only on those FW versions eq_health_check_supported will be set.
+	  * Start checking eq health only after driver has enabled events from FW.
+	  */
+	if (!prop->cpucp_info.eq_health_check_supported || !hdev->init_done)
+		return;
+
+	if (hdev->eq_heartbeat_received)
+		hdev->eq_heartbeat_received = false;
+	else
+		hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+}
+
 static void hl_device_heartbeat(struct work_struct *work)
 {
 	struct hl_device *hdev = container_of(work, struct hl_device,
@@ -999,6 +1018,12 @@ static void hl_device_heartbeat(struct work_struct *work)
 	if (!hl_device_operational(hdev, NULL))
 		goto reschedule;
 
+	/*
+	 * For EQ health check need to check if driver received the heartbeat eq event
+	 * in order to validate the eq is working.
+	 */
+	hl_device_eq_heartbeat(hdev);
+
 	if (!hdev->asic_funcs->send_heartbeat(hdev))
 		goto reschedule;
 
@@ -1055,7 +1080,15 @@ static int device_late_init(struct hl_device *hdev)
 	hdev->high_pll = hdev->asic_prop.high_pll;
 
 	if (hdev->heartbeat) {
+		/*
+		 * Before scheduling the heartbeat driver will check if eq event has received.
+		 * for the first schedule we need to set the indication as true then for the next
+		 * one this indication will be true only if eq event was sent by FW.
+		 */
+		hdev->eq_heartbeat_received = true;
+
 		INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+
 		schedule_delayed_work(&hdev->work_heartbeat,
 				usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 	}
@@ -2235,8 +2268,6 @@ int hl_device_init(struct hl_device *hdev)
 		"Successfully added device %s to habanalabs driver\n",
 		dev_name(&(hdev)->pdev->dev));
 
-	hdev->init_done = true;
-
 	/* After initialization is done, we are ready to receive events from
 	 * the F/W. We can't do it before because we will ignore events and if
 	 * those events are fatal, we won't know about it and the device will
@@ -2244,6 +2275,8 @@ int hl_device_init(struct hl_device *hdev)
 	 */
 	hdev->asic_funcs->enable_events_from_fw(hdev);
 
+	hdev->init_done = true;
+
 	return 0;
 
 cb_pool_fini:
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index f8c597903cac..e5b416852996 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3314,6 +3314,7 @@ struct hl_reset_info {
  *                             device.
  * @supports_ctx_switch: true if a ctx switch is required upon first submission.
  * @support_preboot_binning: true if we support read binning info from preboot.
+ * @eq_heartbeat_received: indication that eq heartbeat event has received from FW.
  * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing.
  * @fw_components: Controls which f/w components to load to the device. There are multiple f/w
  *                 stages and sometimes we want to stop at a certain stage. Used only for testing.
@@ -3474,6 +3475,7 @@ struct hl_device {
 	u8				reset_upon_device_release;
 	u8				supports_ctx_switch;
 	u8				support_preboot_binning;
+	u8				eq_heartbeat_received;
 
 	/* Parameters for bring-up to be upstreamed */
 	u64				nic_ports_mask;
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 677900e18519..e507847bf460 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7804,6 +7804,7 @@ static inline bool is_info_event(u32 event)
 	 * an indication to an error.
 	 */
 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
+	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
 		return true;
 	default:
 		return false;
@@ -9765,6 +9766,11 @@ static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
 	return U16_MAX;
 }
 
+static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
+{
+	hdev->eq_heartbeat_received = true;
+}
+
 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
@@ -10190,6 +10196,10 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 					gaudi2_irq_map_table[event_type].name);
 		break;
 
+	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
+		hl_eq_heartbeat_event_handle(hdev);
+		error_count = GAUDI2_NA_EVENT_CAUSE;
+		break;
 	default:
 		if (gaudi2_irq_map_table[event_type].valid) {
 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
index 6cb0f615fc3e..57e661771b6c 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
@@ -2674,17 +2674,19 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 	{ .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD,
 		 .name = "DEV_RESET_REQ" },
 	{ .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
-		 .name = "PWR_BRK_ENTRY" },
+		 .name = "ARC_PWR_BRK_ENTRY" },
 	{ .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
-		 .name = "PWR_BRK_EXT" },
+		 .name = "ARC_PWR_BRK_EXT" },
 	{ .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
-		 .name = "PWR_RD_MODE0" },
+		 .name = "ARC_PWR_RD_MODE0" },
 	{ .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
-		 .name = "PWR_RD_MODE1" },
+		 .name = "ARC_PWR_RD_MODE1" },
 	{ .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
-		 .name = "PWR_RD_MODE2" },
+		 .name = "ARC_PWR_RD_MODE2" },
 	{ .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
-		 .name = "PWR_RD_MODE3" },
+		 .name = "ARC_PWR_RD_MODE3" },
+	{ .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_EQ_HEARTBEAT" },
 };
 
 #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */
diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 4cdedb603ecb..a18fa81aad1f 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -33,6 +33,17 @@
 #define PLL_MAP_MAX_BITS	128
 #define PLL_MAP_LEN		(PLL_MAP_MAX_BITS / 8)
 
+enum eq_event_id {
+	EQ_EVENT_NIC_STS_REQUEST = 0,
+	EQ_EVENT_PWR_MODE_0,
+	EQ_EVENT_PWR_MODE_1,
+	EQ_EVENT_PWR_MODE_2,
+	EQ_EVENT_PWR_MODE_3,
+	EQ_EVENT_PWR_BRK_ENTRY,
+	EQ_EVENT_PWR_BRK_EXIT,
+	EQ_EVENT_HEARTBEAT,
+};
+
 /*
  * info of the pkt queue pointers in the first async occurrence
  */
@@ -1143,6 +1154,7 @@ struct cpucp_security_info {
  *                     (0 = functional 1 = binned)
  * @interposer_version: Interposer version programmed in eFuse
  * @substrate_version: Substrate version programmed in eFuse
+ * @eq_health_check_supported: eq health check feature supported in FW.
  * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM.
  * @fw_os_version: Firmware OS Version
  */
@@ -1169,7 +1181,7 @@ struct cpucp_info {
 	__u8 xbar_binning_mask;
 	__u8 interposer_version;
 	__u8 substrate_version;
-	__u8 reserved2;
+	__u8 eq_health_check_supported;
 	struct cpucp_security_info sec_info;
 	__le32 fw_hbm_region_size;
 	__u8 pll_map[PLL_MAP_LEN];

From 764bfd138f359423b299b7bf3fcbabb56b981ef5 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Wed, 23 Aug 2023 12:36:25 +0300
Subject: [PATCH 45/77] accel/habanalabs/gaudi2: add eq health check using irq

This is the second patch for applying the eq health check mechanism
which will add support for the interrupt flow for gaudi2 asic.

More info about the interrupt mechanism:
set a dedicated msix for the eq error interrupt, and add
interrupt handler for it.
when FW detects some issue with EQ like EQ_FULL, it'll
raise that interrupt and driver should reset the device.
Driver will inform the FW which msix index to use through
the already existing handshake mechanism which will
send msix info message to fw.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs.h |  1 +
 drivers/accel/habanalabs/common/irq.c        | 12 ++++++++++++
 drivers/accel/habanalabs/gaudi2/gaudi2.c     | 16 ++++++++++++++++
 drivers/accel/habanalabs/gaudi2/gaudi2P.h    |  1 +
 include/linux/habanalabs/cpucp_if.h          |  1 +
 5 files changed, 31 insertions(+)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index e5b416852996..6f2cbd3c2e95 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3689,6 +3689,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg);
 irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
 irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
 irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
+irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg);
 u32 hl_cq_inc_ptr(u32 ptr);
 
 int hl_asid_init(struct hl_device *hdev);
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index 10ac100bf9e2..f6b6c54bc868 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -401,6 +401,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
+{
+	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+	struct hl_device *hdev = arg;
+
+	dev_err(hdev->dev, "EQ error interrupt received\n");
+
+	hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * hl_irq_handler_eq - irq handler for event queue
  *
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index e507847bf460..b0ba62b691ec 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -4175,6 +4175,8 @@ static const char *gaudi2_irq_name(u16 irq_number)
 		return "gaudi2 unexpected error";
 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
 		return "gaudi2 user completion";
+	case GAUDI2_IRQ_NUM_EQ_ERROR:
+		return "gaudi2 eq error";
 	default:
 		return "invalid";
 	}
@@ -4317,6 +4319,15 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 		}
 	}
 
+	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+	rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
+					IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
+					hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+		goto free_user_irq;
+	}
+
 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
 
 	return 0;
@@ -4376,6 +4387,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
 	}
 
 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
+	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
 }
 
 static void gaudi2_disable_msix(struct hl_device *hdev)
@@ -4412,6 +4424,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
 	free_irq(irq, cq);
 
+	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+	free_irq(irq, hdev);
+
 	pci_free_irq_vectors(hdev->pdev);
 
 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
@@ -11345,6 +11360,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64
 static void gaudi2_get_msi_info(__le32 *table)
 {
 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
+	table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
 }
 
 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index 4535aa5ab561..14e281fd9895 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -419,6 +419,7 @@ enum gaudi2_irq_num {
 	GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
 	GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
 	GAUDI2_IRQ_NUM_TPC_ASSERT,
+	GAUDI2_IRQ_NUM_EQ_ERROR,
 	GAUDI2_IRQ_NUM_RESERVED_FIRST,
 	GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
 	GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index a18fa81aad1f..84d74c4ee4d3 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -1004,6 +1004,7 @@ enum cpucp_msi_type {
 	CPUCP_NIC_PORT5_MSI_TYPE,
 	CPUCP_NIC_PORT7_MSI_TYPE,
 	CPUCP_NIC_PORT9_MSI_TYPE,
+	CPUCP_EVENT_QUEUE_ERR_MSI_TYPE,
 	CPUCP_NUM_OF_MSI_TYPES
 };
 

From 051868d93cfd342b4ff8e6297b93a6a43dbe81b3 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Sun, 27 Aug 2023 19:01:20 +0300
Subject: [PATCH 46/77] accel/habanalabs: prevent sending heartbeat before
 events are enabled

After the heartbeat mechanism is now expanded to be used also
for EQ health check, we shouldn't send heartbeat messages
to FW before driver allow events to be received from FW.

Because if the driver will send two heartbeats before it enables
events to be received from FW, then the EQ health check
will fail and reset the device.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 1d1ccd8d5c75..0c9ba09c1b75 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -994,12 +994,7 @@ static void hl_device_eq_heartbeat(struct hl_device *hdev)
 	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 
-	 /*
-	  * This feature supported in FW version 1.12.0 45.2.0 and above,
-	  * only on those FW versions eq_health_check_supported will be set.
-	  * Start checking eq health only after driver has enabled events from FW.
-	  */
-	if (!prop->cpucp_info.eq_health_check_supported || !hdev->init_done)
+	if (!prop->cpucp_info.eq_health_check_supported)
 		return;
 
 	if (hdev->eq_heartbeat_received)
@@ -1015,7 +1010,8 @@ static void hl_device_heartbeat(struct work_struct *work)
 	struct hl_info_fw_err_info info = {0};
 	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
 
-	if (!hl_device_operational(hdev, NULL))
+	/* Start heartbeat checks only after driver has enabled events from FW */
+	if (!hl_device_operational(hdev, NULL) || !hdev->init_done)
 		goto reschedule;
 
 	/*

From dfdbc55a9c8c5ab00ae2d5963ac1199fdc9da2d3 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 4 Aug 2023 17:12:58 +0300
Subject: [PATCH 47/77] accel/habanalabs: always pass exported size to
 alloc_sgt_from_device_pages()

For Gaudi1 the exported dma-buf is always composed of a single page, and
therefore the exported size is equal to this page's size.
When calling alloc_sgt_from_device_pages(), we pass 0 as the exported
size and internally calculate it as "number of pages * page size".
This makes alloc_sgt_from_device_pages() less clear, because the
exported size parameter is not understood as a restriction on the pages'
size.
Modify to always pass the exported size explicitly.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/memory.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 1b1b4256b011..c7e49cb383cf 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1561,8 +1561,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);
 
-	/* remove export size restrictions in case not explicitly defined */
-	cur_size_to_export = exported_size ? exported_size : (npages * page_size);
+	cur_size_to_export = exported_size;
 
 	/* If the size of each page is larger than the dma max segment size,
 	 * then we can't combine pages and the number of entries in the SGL
@@ -1728,7 +1727,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 						&hl_dmabuf->device_address,
 						1,
 						hl_dmabuf->dmabuf->size,
-						0,
+						hl_dmabuf->dmabuf->size,
 						attachment->dev,
 						dir);
 

From efbca048c64bb00562c3e980dfc6ca47fff4fa0e Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 4 Aug 2023 17:42:13 +0300
Subject: [PATCH 48/77] accel/habanalabs: use exported size from dma_buf and
 not from phys_pg_pack

The 'exported_size' member in 'struct hl_vm_phys_pg_pack' is used to
keep the exported dma-buf size, to be later used when the buffer is
mapped.
However it is possible that the same phys_pg_pack will be exported more
than once, and independently of when the mapping takes place.
Remove this member from the phys_pg_pack structure, and simply use the
size in the dma-buf object as the exported size when mapping.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs.h | 2 --
 drivers/accel/habanalabs/common/memory.c     | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 6f2cbd3c2e95..4c5d55c9109d 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -2159,7 +2159,6 @@ struct hl_vm_hw_block_list_node {
  * @pages: the physical page array.
  * @npages: num physical pages in the pack.
  * @total_size: total size of all the pages in this list.
- * @exported_size: buffer exported size.
  * @node: used to attach to deletion list that is used when all the allocations are cleared
  *        at the teardown of the context.
  * @mapping_cnt: number of shared mappings.
@@ -2176,7 +2175,6 @@ struct hl_vm_phys_pg_pack {
 	u64			*pages;
 	u64			npages;
 	u64			total_size;
-	u64			exported_size;
 	struct list_head	node;
 	atomic_t		mapping_cnt;
 	u32			asid;
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index c7e49cb383cf..27ab176c55c1 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1719,7 +1719,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 						phys_pg_pack->pages,
 						phys_pg_pack->npages,
 						phys_pg_pack->page_size,
-						phys_pg_pack->exported_size,
+						hl_dmabuf->dmabuf->size,
 						attachment->dev,
 						dir);
 	else
@@ -2044,7 +2044,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 		if (rc)
 			goto dec_memhash_export_cnt;
 
-		phys_pg_pack->exported_size = size;
 		hl_dmabuf->phys_pg_pack = phys_pg_pack;
 		hl_dmabuf->memhash_hnode = hnode;
 	} else {

From 0b75cb5b240fddf181c284d415ee77ef61b418d6 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 9 Aug 2023 16:14:49 +0300
Subject: [PATCH 49/77] accel/habanalabs: export dma-buf only if size/offset
 multiples of PAGE_SIZE

It is currently allowed for a user to export dma-buf with size and
offset that are not multiples of PAGE_SIZE.
The exported memory is mapped for the importer device, and there it will
be rounded to PAGE_SIZE, leading to actually exporting more than the
user intended to.
To make the user be aware of it, accept only size and offset which are
multiple of PAGE_SIZE.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/memory.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 27ab176c55c1..b4a9ff692ebc 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1877,16 +1877,16 @@ static int export_dmabuf(struct hl_ctx *ctx,
 
 static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
 {
-	if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
+	if (!PAGE_ALIGNED(device_addr)) {
 		dev_dbg(hdev->dev,
-			"exported device memory address 0x%llx should be aligned to 0x%lx\n",
+			"exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n",
 			device_addr, PAGE_SIZE);
 		return -EINVAL;
 	}
 
-	if (size < PAGE_SIZE) {
+	if (!size || !PAGE_ALIGNED(size)) {
 		dev_dbg(hdev->dev,
-			"exported device memory size %llu should be equal to or greater than %lu\n",
+			"exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n",
 			size, PAGE_SIZE);
 		return -EINVAL;
 	}
@@ -1937,6 +1937,13 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
 	if (rc)
 		return rc;
 
+	if (!PAGE_ALIGNED(offset)) {
+		dev_dbg(hdev->dev,
+			"exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
+			offset, PAGE_SIZE);
+		return -EINVAL;
+	}
+
 	if ((offset + size) > phys_pg_pack->total_size) {
 		dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
 				offset, size, phys_pg_pack->total_size);

From d89d329a2bb39e0e0ad37d7a40302d0fa7e8851a Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 14 Aug 2023 18:01:21 +0300
Subject: [PATCH 50/77] accel/habanalabs: tiny refactor of hl_map_dmabuf()

alloc_sgt_from_device_pages() includes relatively many parameters, and
in a subsequent change another offset parameter is going to be added.
Using structure fields directly when calling this function, and in
hl_map_dmabuf() it is done twice, makes it a little bit difficult to
understand the meaning of the parameters.
To make it clearer, assign the required values into local variables with
explicit names, and use the variables when calling the function.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/memory.c | 32 +++++++++++-------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index b4a9ff692ebc..c09c066a0db4 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1699,6 +1699,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
 static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 					enum dma_data_direction dir)
 {
+	u64 *pages, npages, page_size, exported_size;
 	struct dma_buf *dma_buf = attachment->dmabuf;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct hl_dmabuf_priv *hl_dmabuf;
@@ -1707,30 +1708,27 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 
 	hl_dmabuf = dma_buf->priv;
 	hdev = hl_dmabuf->ctx->hdev;
-	phys_pg_pack = hl_dmabuf->phys_pg_pack;
 
 	if (!attachment->peer2peer) {
 		dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
 		return ERR_PTR(-EPERM);
 	}
 
-	if (phys_pg_pack)
-		sgt = alloc_sgt_from_device_pages(hdev,
-						phys_pg_pack->pages,
-						phys_pg_pack->npages,
-						phys_pg_pack->page_size,
-						hl_dmabuf->dmabuf->size,
-						attachment->dev,
-						dir);
-	else
-		sgt = alloc_sgt_from_device_pages(hdev,
-						&hl_dmabuf->device_address,
-						1,
-						hl_dmabuf->dmabuf->size,
-						hl_dmabuf->dmabuf->size,
-						attachment->dev,
-						dir);
+	exported_size = hl_dmabuf->dmabuf->size;
+	phys_pg_pack = hl_dmabuf->phys_pg_pack;
 
+	if (phys_pg_pack) {
+		pages = phys_pg_pack->pages;
+		npages = phys_pg_pack->npages;
+		page_size = phys_pg_pack->page_size;
+	} else {
+		pages = &hl_dmabuf->device_address;
+		npages = 1;
+		page_size = hl_dmabuf->dmabuf->size;
+	}
+
+	sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size,
+						attachment->dev, dir);
 	if (IS_ERR(sgt))
 		dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
 

From 0165994c215f321e2d055368f89b424756e340eb Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Thu, 24 Aug 2023 15:45:21 +0300
Subject: [PATCH 51/77] accel/habanalabs: fix bug in timestamp interrupt
 handling

There is a potential race between user thread seeking to re-use
a timestamp record with new interrupt id, while this record is still
in the middle of interrupt handling and it is about to be freed.
Imagine the driver set the record in_use to 0 and only then fill the
free_node information. This might lead to unpleasant scenario where
the new registration thread detects the record as free to use, and
change the cq buff address. That will cause the free_node to get
the wrong buffer address to put refcount to.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/irq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index f6b6c54bc868..058f27040805 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -259,8 +259,6 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 	dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
 			pend, pend->ts_reg_info.timestamp_kernel_addr, interrupt_id);
 
-	/* Mark kernel CB node as free */
-	pend->ts_reg_info.in_use = false;
 	list_del(&pend->wait_list_node);
 
 	/* Putting the refcount for ts_buff and cq_cb objects will be handled
@@ -270,6 +268,9 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 	free_node->cq_cb = pend->ts_reg_info.cq_cb;
 	list_add(&free_node->free_objects_node, *free_list);
 
+	/* Mark TS record as free */
+	pend->ts_reg_info.in_use = false;
+
 	return 0;
 }
 

From 1157b5d6b3b069394e37cd3b1d32c39eb833e546 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Thu, 2 Mar 2023 11:09:24 +0200
Subject: [PATCH 52/77] accel/habanalabs: optimize timestamp registration
 handler

Currently we use dynamic allocation inside the irq handler
in order to allocate free node to be used for the free jobs.

This operation is expensive, especially when we deal with large
burst of events records that get released at the same time.

The alternative is to have pre allocated pool of free nodes
and just fetch nodes from this pool at irq handling time instead
of allocating them.

In case the pool becomes full, then the driver will fallback to
dynamic allocations.

As part of the optimization also update the unregister flow
upon re-using a timestamp record, by making the operation much
simpler and quicker. We already have the record in the registration
flow and now we just seek to re-use with different interrupt.
Therefore, no need to look for buffer according to the user handle.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 85 +++++++------------
 drivers/accel/habanalabs/common/device.c      | 59 ++++++++++++-
 drivers/accel/habanalabs/common/habanalabs.h  | 27 ++++++
 drivers/accel/habanalabs/common/irq.c         | 77 ++++++++++++++---
 4 files changed, 180 insertions(+), 68 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index daaa3cbe0b12..02049bd26356 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -3246,60 +3246,29 @@ static int validate_and_get_ts_record(struct device *dev,
 	return 0;
 }
 
-static int unregister_timestamp_node(struct hl_device *hdev, struct hl_ctx *ctx,
-			struct hl_mem_mgr *mmg, u64 ts_handle, u64 ts_offset,
-			struct hl_user_interrupt *interrupt)
+static void unregister_timestamp_node(struct hl_device *hdev,
+			struct hl_user_pending_interrupt *record, bool need_lock)
 {
-	struct hl_user_pending_interrupt *req_event_record, *pend, *temp_pend;
-	struct hl_mmap_mem_buf *buff;
-	struct hl_ts_buff *ts_buff;
+	struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
 	bool ts_rec_found = false;
-	int rc;
 
-	buff = hl_mmap_mem_buf_get(mmg, ts_handle);
-	if (!buff) {
-		dev_err(hdev->dev, "invalid TS buff handle!\n");
-		return -EINVAL;
+	if (need_lock)
+		spin_lock(&interrupt->wait_list_lock);
+
+	if (record->ts_reg_info.in_use) {
+		record->ts_reg_info.in_use = false;
+		list_del(&record->wait_list_node);
+		ts_rec_found = true;
 	}
 
-	ts_buff = buff->private;
-
-	rc = validate_and_get_ts_record(hdev->dev, ts_buff, ts_offset, &req_event_record);
-	if (rc)
-		goto put_buf;
-
-	/*
-	 * Note: we don't use the ts in_use field here, but we rather scan the list
-	 * because we cannot rely on the user to keep the order of register/unregister calls
-	 * and since we might have races here all the time between the irq and register/unregister
-	 * calls so it safer to lock the list and scan it to find the node.
-	 * If the node found on the list we mark it as not in use and delete it from the list,
-	 * if it's not here then the node was handled already in the irq before we get into
-	 * this ioctl.
-	 */
-	spin_lock(&interrupt->wait_list_lock);
-
-	list_for_each_entry_safe(pend, temp_pend, &interrupt->wait_list_head, wait_list_node) {
-		if (pend == req_event_record) {
-			pend->ts_reg_info.in_use = false;
-			list_del(&pend->wait_list_node);
-			ts_rec_found = true;
-			break;
-		}
-	}
-
-	spin_unlock(&interrupt->wait_list_lock);
+	if (need_lock)
+		spin_unlock(&interrupt->wait_list_lock);
 
 	/* Put refcounts that were taken when we registered the event */
 	if (ts_rec_found) {
-		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
-		hl_cb_put(pend->ts_reg_info.cq_cb);
+		hl_mmap_mem_buf_put(record->ts_reg_info.buf);
+		hl_cb_put(record->ts_reg_info.cq_cb);
 	}
-
-put_buf:
-	hl_mmap_mem_buf_put(buff);
-
-	return rc;
 }
 
 static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
@@ -3308,6 +3277,7 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
 {
 	struct hl_user_pending_interrupt *req_offset_record;
 	struct hl_ts_buff *ts_buff = data->buf->private;
+	bool need_lock = false;
 	int rc;
 
 	rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
@@ -3315,21 +3285,30 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
 	if (rc)
 		return rc;
 
-	/* In case the node already registered, need to unregister first then re-use*/
+	/* In case the node already registered, need to unregister first then re-use */
 	if (req_offset_record->ts_reg_info.in_use) {
 		dev_dbg(data->buf->mmg->dev,
-				"Requested ts offset(%llx) is in use, unregister first\n",
-							data->ts_offset);
+				"Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
+				req_offset_record,
+				req_offset_record->ts_reg_info.interrupt->interrupt_id,
+				req_offset_record->ts_reg_info.timestamp_kernel_addr,
+				data->interrupt->interrupt_id);
 		/*
 		 * Since interrupt here can be different than the one the node currently registered
-		 * on, and we don't wan't to lock two lists while we're doing unregister, so
+		 * on, and we don't want to lock two lists while we're doing unregister, so
 		 * unlock the new interrupt wait list here and acquire the lock again after you done
 		 */
-		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
+		if (data->interrupt->interrupt_id !=
+				req_offset_record->ts_reg_info.interrupt->interrupt_id) {
 
-		unregister_timestamp_node(hdev, ctx, data->mmg, data->ts_handle,
-				data->ts_offset, req_offset_record->ts_reg_info.interrupt);
-		spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
+			need_lock = true;
+			spin_unlock(&data->interrupt->wait_list_lock);
+		}
+
+		unregister_timestamp_node(hdev, req_offset_record, need_lock);
+
+		if (need_lock)
+			spin_lock(&data->interrupt->wait_list_lock);
 	}
 
 	/* Fill up the new registration node info and add it to the list */
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 0c9ba09c1b75..8cca4050d276 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -2033,7 +2033,9 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 int hl_device_init(struct hl_device *hdev)
 {
 	int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
+	struct hl_ts_free_jobs *free_jobs_data;
 	bool expose_interfaces_on_err = false;
+	void *p;
 
 	/* Initialize ASIC function pointers and perform early init */
 	rc = device_early_init(hdev);
@@ -2050,15 +2052,43 @@ int hl_device_init(struct hl_device *hdev)
 			rc = -ENOMEM;
 			goto early_fini;
 		}
+
+		/* Timestamp records supported only if CQ supported in device */
+		if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
+			for (i = 0 ; i < user_interrupt_cnt ; i++) {
+				p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
+						sizeof(struct timestamp_reg_free_node));
+				if (!p) {
+					rc = -ENOMEM;
+					goto free_usr_intr_mem;
+				}
+				free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data;
+				free_jobs_data->free_nodes_pool = p;
+				free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
+				free_jobs_data->next_avail_free_node_idx = 0;
+			}
+		}
 	}
 
+	free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data;
+	p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
+				sizeof(struct timestamp_reg_free_node));
+	if (!p) {
+		rc = -ENOMEM;
+		goto free_usr_intr_mem;
+	}
+
+	free_jobs_data->free_nodes_pool = p;
+	free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
+	free_jobs_data->next_avail_free_node_idx = 0;
+
 	/*
 	 * Start calling ASIC initialization. First S/W then H/W and finally
 	 * late init
 	 */
 	rc = hdev->asic_funcs->sw_init(hdev);
 	if (rc)
-		goto free_usr_intr_mem;
+		goto free_common_usr_intr_mem;
 
 
 	/* initialize completion structure for multi CS wait */
@@ -2297,8 +2327,17 @@ int hl_device_init(struct hl_device *hdev)
 	hl_hw_queues_destroy(hdev);
 sw_fini:
 	hdev->asic_funcs->sw_fini(hdev);
+free_common_usr_intr_mem:
+	vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
 free_usr_intr_mem:
-	kfree(hdev->user_interrupt);
+	if (user_interrupt_cnt) {
+		for (i = 0 ; i < user_interrupt_cnt ; i++) {
+			if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool)
+				break;
+			vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
+		}
+		kfree(hdev->user_interrupt);
+	}
 early_fini:
 	device_early_fini(hdev);
 out_disabled:
@@ -2323,6 +2362,7 @@ int hl_device_init(struct hl_device *hdev)
  */
 void hl_device_fini(struct hl_device *hdev)
 {
+	u32 user_interrupt_cnt;
 	bool device_in_reset;
 	ktime_t timeout;
 	u64 reset_sec;
@@ -2443,7 +2483,20 @@ void hl_device_fini(struct hl_device *hdev)
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
 	kfree(hdev->completion_queue);
-	kfree(hdev->user_interrupt);
+
+	user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
+					hdev->asic_prop.user_interrupt_count;
+
+	if (user_interrupt_cnt) {
+		if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
+			for (i = 0 ; i < user_interrupt_cnt ; i++)
+				vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
+		}
+
+		kfree(hdev->user_interrupt);
+	}
+
+	vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
 
 	hl_hw_queues_destroy(hdev);
 
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 4c5d55c9109d..1342686d0ce5 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -106,6 +106,8 @@ struct hl_fpriv;
 /* MMU */
 #define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 
+#define TIMESTAMP_FREE_NODES_NUM	512
+
 /**
  * enum hl_mmu_page_table_location - mmu page table location
  * @MMU_DR_PGT: page-table is located on device DRAM.
@@ -1104,9 +1106,26 @@ enum hl_user_interrupt_type {
 	HL_USR_INTERRUPT_UNEXPECTED
 };
 
+/**
+ * struct hl_ts_free_jobs - holds user interrupt ts free nodes related data
+ * @free_nodes_pool: pool of nodes to be used for free timestamp jobs
+ * @free_nodes_length: number of nodes in free_nodes_pool
+ * @next_avail_free_node_idx: index of the next free node in the pool
+ *
+ * the free nodes pool must be protected by the user interrupt lock
+ * to avoid race between different interrupts which are using the same
+ * ts buffer with different offsets.
+ */
+struct hl_ts_free_jobs {
+	struct timestamp_reg_free_node *free_nodes_pool;
+	u32				free_nodes_length;
+	u32				next_avail_free_node_idx;
+};
+
 /**
  * struct hl_user_interrupt - holds user interrupt information
  * @hdev: pointer to the device structure
+ * @ts_free_jobs_data: timestamp free jobs related data
  * @type: user interrupt type
  * @wait_list_head: head to the list of user threads pending on this interrupt
  * @wait_list_lock: protects wait_list_head
@@ -1115,6 +1134,7 @@ enum hl_user_interrupt_type {
  */
 struct hl_user_interrupt {
 	struct hl_device		*hdev;
+	struct hl_ts_free_jobs		ts_free_jobs_data;
 	enum hl_user_interrupt_type	type;
 	struct list_head		wait_list_head;
 	spinlock_t			wait_list_lock;
@@ -1127,11 +1147,15 @@ struct hl_user_interrupt {
  * @free_objects_node: node in the list free_obj_jobs
  * @cq_cb: pointer to cq command buffer to be freed
  * @buf: pointer to timestamp buffer to be freed
+ * @in_use: indicates whether the node still in use in workqueue thread.
+ * @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler
  */
 struct timestamp_reg_free_node {
 	struct list_head	free_objects_node;
 	struct hl_cb		*cq_cb;
 	struct hl_mmap_mem_buf	*buf;
+	atomic_t		in_use;
+	u8			dynamic_alloc;
 };
 
 /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
@@ -1140,11 +1164,14 @@ struct timestamp_reg_free_node {
  * @free_obj: workqueue object to free timestamp registration node objects
  * @hdev: pointer to the device structure
  * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
+ * @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the
+ *                               interrupt handler.
  */
 struct timestamp_reg_work_obj {
 	struct work_struct	free_obj;
 	struct hl_device	*hdev;
 	struct list_head	*free_obj_head;
+	struct list_head	*dynamic_alloc_free_obj_head;
 };
 
 /* struct timestamp_reg_info - holds the timestamp registration related data.
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index 058f27040805..0947d286a5ab 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work)
 {
 	struct timestamp_reg_work_obj *job =
 			container_of(work, struct timestamp_reg_work_obj, free_obj);
+	struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
 	struct timestamp_reg_free_node *free_obj, *temp_free_obj;
 	struct list_head *free_list_head = job->free_obj_head;
+
 	struct hl_device *hdev = job->hdev;
 
 	list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
@@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work)
 
 		hl_mmap_mem_buf_put(free_obj->buf);
 		hl_cb_put(free_obj->cq_cb);
-		kfree(free_obj);
+		atomic_set(&free_obj->in_use, 0);
 	}
 
 	kfree(free_list_head);
+
+	if (dynamic_alloc_free_list_head) {
+		list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
+								free_objects_node) {
+			dev_dbg(hdev->dev,
+				"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
+						free_obj->buf,
+						free_obj->cq_cb);
+
+			hl_mmap_mem_buf_put(free_obj->buf);
+			hl_cb_put(free_obj->cq_cb);
+			list_del(&free_obj->free_objects_node);
+			kfree(free_obj);
+		}
+
+		kfree(dynamic_alloc_free_list_head);
+	}
+
 	kfree(job);
 }
 
@@ -233,12 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work)
  * list to a dedicated workqueue to do the actual put.
  */
 static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
-						struct list_head **free_list, ktime_t now,
-						u32 interrupt_id)
+						struct list_head **free_list,
+						struct list_head **dynamic_alloc_list,
+						struct hl_user_interrupt *intr)
 {
+	struct hl_ts_free_jobs *ts_free_jobs_data;
 	struct timestamp_reg_free_node *free_node;
+	u32 free_node_index;
 	u64 timestamp;
 
+	ts_free_jobs_data = &intr->ts_free_jobs_data;
+	free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
+
 	if (!(*free_list)) {
 		/* Alloc/Init the timestamp registration free objects list */
 		*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
@@ -248,16 +274,35 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 		INIT_LIST_HEAD(*free_list);
 	}
 
-	free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
-	if (!free_node)
-		return -ENOMEM;
+	free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
+	if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
+		dev_dbg(hdev->dev,
+			"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
+				pend->ts_reg_info.buf,
+				pend,
+				intr->interrupt_id);
 
-	timestamp = ktime_to_ns(now);
+		if (!(*dynamic_alloc_list)) {
+			*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
+			if (!(*dynamic_alloc_list))
+				return -ENOMEM;
+
+			INIT_LIST_HEAD(*dynamic_alloc_list);
+		}
+
+		free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
+		if (!free_node)
+			return -ENOMEM;
+
+		free_node->dynamic_alloc = 1;
+	}
+
+	timestamp = ktime_to_ns(intr->timestamp);
 
 	*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
 
 	dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
-			pend, pend->ts_reg_info.timestamp_kernel_addr, interrupt_id);
+			pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
 
 	list_del(&pend->wait_list_node);
 
@@ -266,7 +311,14 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 	 */
 	free_node->buf = pend->ts_reg_info.buf;
 	free_node->cq_cb = pend->ts_reg_info.cq_cb;
-	list_add(&free_node->free_objects_node, *free_list);
+
+	if (free_node->dynamic_alloc) {
+		list_add(&free_node->free_objects_node, *dynamic_alloc_list);
+	} else {
+		ts_free_jobs_data->next_avail_free_node_idx =
+				(++free_node_index) % ts_free_jobs_data->free_nodes_length;
+		list_add(&free_node->free_objects_node, *free_list);
+	}
 
 	/* Mark TS record as free */
 	pend->ts_reg_info.in_use = false;
@@ -276,8 +328,8 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 
 static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr)
 {
+	struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
 	struct hl_user_pending_interrupt *pend, *temp_pend;
-	struct list_head *ts_reg_free_list_head = NULL;
 	struct timestamp_reg_work_obj *job;
 	bool reg_node_handle_fail = false;
 	int rc;
@@ -303,8 +355,8 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 			if (pend->ts_reg_info.buf) {
 				if (!reg_node_handle_fail) {
 					rc = handle_registration_node(hdev, pend,
-							&ts_reg_free_list_head, intr->timestamp,
-							intr->interrupt_id);
+							&ts_reg_free_list_head,
+							&dynamic_alloc_list_head, intr);
 					if (rc)
 						reg_node_handle_fail = true;
 				}
@@ -320,6 +372,7 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	if (ts_reg_free_list_head) {
 		INIT_WORK(&job->free_obj, hl_ts_free_objects);
 		job->free_obj_head = ts_reg_free_list_head;
+		job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
 		job->hdev = hdev;
 		queue_work(hdev->ts_free_obj_wq, &job->free_obj);
 	} else {

From ba24b5ec7847d4126fc37cabd4cef3579d9ca806 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Wed, 19 Jul 2023 16:56:53 +0300
Subject: [PATCH 53/77] accel/habanalabs: split user interrupts pending list

Currently driver maintain one list for both pending user interrupts
which seeks to wait till CQ reaches it's target value and also the ones
that seeks to get timestamp records when the CQ reaches it's target
value.
This causes delay in handling the waiters which gets higher priority
than the timestamp records.
In order to solve this, let's split the list into two,
one for each case and each one is protected by it's own spinlock.
Waiters will be handled within the interrupt context first,
then the timestamp records will be set.
Freeing the timestamp related memory will be handled in a workqueue.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 241 ++++++++++--------
 drivers/accel/habanalabs/common/habanalabs.h  |  12 +-
 drivers/accel/habanalabs/common/irq.c         |  89 ++++---
 drivers/accel/habanalabs/gaudi2/gaudi2.c      |  20 +-
 4 files changed, 212 insertions(+), 150 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 02049bd26356..751d2c7d3fb8 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -1098,19 +1098,22 @@ static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
 	struct hl_user_pending_interrupt *pend, *temp;
+	unsigned long flags;
 
-	spin_lock(&interrupt->wait_list_lock);
-	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
-		if (pend->ts_reg_info.buf) {
-			list_del(&pend->wait_list_node);
-			hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
-			hl_cb_put(pend->ts_reg_info.cq_cb);
-		} else {
-			pend->fence.error = -EIO;
-			complete_all(&pend->fence.completion);
-		}
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
+		pend->fence.error = -EIO;
+		complete_all(&pend->fence.completion);
 	}
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+	spin_lock_irqsave(&interrupt->ts_list_lock, flags);
+	list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
+		list_del(&pend->list_node);
+		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
+		hl_cb_put(pend->ts_reg_info.cq_cb);
+	}
+	spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
 }
 
 void hl_release_pending_user_interrupts(struct hl_device *hdev)
@@ -3251,18 +3254,19 @@ static void unregister_timestamp_node(struct hl_device *hdev,
 {
 	struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
 	bool ts_rec_found = false;
+	unsigned long flags;
 
 	if (need_lock)
-		spin_lock(&interrupt->wait_list_lock);
+		spin_lock_irqsave(&interrupt->ts_list_lock, flags);
 
 	if (record->ts_reg_info.in_use) {
 		record->ts_reg_info.in_use = false;
-		list_del(&record->wait_list_node);
+		list_del(&record->list_node);
 		ts_rec_found = true;
 	}
 
 	if (need_lock)
-		spin_unlock(&interrupt->wait_list_lock);
+		spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
 
 	/* Put refcounts that were taken when we registered the event */
 	if (ts_rec_found) {
@@ -3272,7 +3276,7 @@ static void unregister_timestamp_node(struct hl_device *hdev,
 }
 
 static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
-					struct wait_interrupt_data *data,
+					struct wait_interrupt_data *data, unsigned long *flags,
 					struct hl_user_pending_interrupt **pend)
 {
 	struct hl_user_pending_interrupt *req_offset_record;
@@ -3302,13 +3306,13 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
 				req_offset_record->ts_reg_info.interrupt->interrupt_id) {
 
 			need_lock = true;
-			spin_unlock(&data->interrupt->wait_list_lock);
+			spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
 		}
 
 		unregister_timestamp_node(hdev, req_offset_record, need_lock);
 
 		if (need_lock)
-			spin_lock(&data->interrupt->wait_list_lock);
+			spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
 	}
 
 	/* Fill up the new registration node info and add it to the list */
@@ -3325,13 +3329,89 @@ static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx
 	return rc;
 }
 
-static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				struct wait_interrupt_data *data,
-				bool register_ts_record,
 				u32 *status, u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	unsigned long timeout;
+	unsigned long flags;
+	int rc = 0;
+
+	hl_ctx_get(ctx);
+
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
+		rc = -EINVAL;
+		goto put_ctx;
+	}
+
+	/* Validate the cq offset */
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
+		rc = -EINVAL;
+		goto put_cq_cb;
+	}
+
+	dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
+					data->interrupt->interrupt_id, data->ts_handle,
+					data->ts_offset, data->cq_offset);
+
+	data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
+	if (!data->buf) {
+		rc = -EINVAL;
+		goto put_cq_cb;
+	}
+
+	spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
+
+	/* get ts buffer record */
+	rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
+	if (rc) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+		goto put_ts_buff;
+	}
+
+	/* We check for completion value as interrupt could have been received
+	 * before we add the timestamp node to the ts list.
+	 */
+	if (*pend->cq_kernel_addr >= data->target_value) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+
+		dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
+				pend, data->ts_offset, data->interrupt->interrupt_id);
+
+		pend->ts_reg_info.in_use = 0;
+		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
+
+		goto put_ts_buff;
+	}
+
+	list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
+	spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+
+	rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
+
+	hl_ctx_put(ctx);
+
+	return rc;
+
+put_ts_buff:
+	hl_mmap_mem_buf_put(data->buf);
+put_cq_cb:
+	hl_cb_put(data->cq_cb);
+put_ctx:
+	hl_ctx_put(ctx);
+
+	return rc;
+}
+
+static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+				struct wait_interrupt_data *data,
+				u32 *status, u64 *timestamp)
+{
+	struct hl_user_pending_interrupt *pend;
+	unsigned long timeout, flags;
 	long completion_rc;
 	int rc = 0;
 
@@ -3352,61 +3432,29 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		goto put_cq_cb;
 	}
 
-	if (register_ts_record) {
-		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
-				data->interrupt->interrupt_id, data->ts_handle,
-				data->ts_offset, data->cq_offset);
-
-		data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
-		if (!data->buf) {
-			rc = -EINVAL;
-			goto put_cq_cb;
-		}
-
-		spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
-
-		/* get ts buffer record */
-		rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &pend);
-		if (rc) {
-			spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
-			goto put_ts_buff;
-		}
-	} else {
-		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
-		if (!pend) {
-			rc = -ENOMEM;
-			goto put_cq_cb;
-		}
-		hl_fence_init(&pend->fence, ULONG_MAX);
-		pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
-		pend->cq_target_value = data->target_value;
-		spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
+	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+	if (!pend) {
+		rc = -ENOMEM;
+		goto put_cq_cb;
 	}
 
+	hl_fence_init(&pend->fence, ULONG_MAX);
+	pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
+	pend->cq_target_value = data->target_value;
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+
+
 	/* We check for completion value as interrupt could have been received
-	 * before we add the wait/timestamp node to the wait list.
+	 * before we add the wait node to the wait list.
 	 */
-	if (*pend->cq_kernel_addr >= data->target_value) {
-		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
+	if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
+		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
-		if (register_ts_record) {
-			dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
-					pend, data->ts_offset, data->interrupt->interrupt_id);
-			pend->ts_reg_info.in_use = false;
-		}
+		if (*pend->cq_kernel_addr >= data->target_value)
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		else
+			*status = HL_WAIT_CS_STATUS_BUSY;
 
-		*status = HL_WAIT_CS_STATUS_COMPLETED;
-
-		if (register_ts_record) {
-			*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
-			goto put_ts_buff;
-		} else {
-			pend->fence.timestamp = ktime_get();
-			goto set_timestamp;
-		}
-	} else if (!data->intr_timeout_us) {
-		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
-		*status = HL_WAIT_CS_STATUS_BUSY;
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
 	}
@@ -3417,13 +3465,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * in order to shorten the list pass loop, since
 	 * same list could have nodes for different cq counter handle.
 	 */
-	list_add_tail(&pend->wait_list_node, &data->interrupt->wait_list_head);
-	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
-
-	if (register_ts_record) {
-		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
-		goto ts_registration_exit;
-	}
+	list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
 	/* Wait for interrupt handler to signal completion */
 	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
@@ -3462,21 +3505,18 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * for ts record, the node will be deleted in the irq handler after
 	 * we reach the target value.
 	 */
-	spin_lock_irqsave(&data->interrupt->wait_list_lock, data->flags);
-	list_del(&pend->wait_list_node);
-	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, data->flags);
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+	list_del(&pend->list_node);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
 set_timestamp:
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
 	kfree(pend);
 	hl_cb_put(data->cq_cb);
-ts_registration_exit:
 	hl_ctx_put(ctx);
 
 	return rc;
 
-put_ts_buff:
-	hl_mmap_mem_buf_put(data->buf);
 put_cq_cb:
 	hl_cb_put(data->cq_cb);
 put_ctx:
@@ -3513,7 +3553,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	 * handler to monitor
 	 */
 	spin_lock(&interrupt->wait_list_lock);
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
+	list_add_tail(&pend->list_node, &interrupt->wait_list_head);
 	spin_unlock(&interrupt->wait_list_lock);
 
 	/* We check for completion value as interrupt could have been received
@@ -3590,7 +3630,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 
 remove_pending_user_interrupt:
 	spin_lock(&interrupt->wait_list_lock);
-	list_del(&pend->wait_list_node);
+	list_del(&pend->list_node);
 	spin_unlock(&interrupt->wait_list_lock);
 
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
@@ -3649,16 +3689,6 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return -EINVAL;
 	}
 
-	/*
-	 * Allow only one registration at a time. this is needed in order to prevent issues
-	 * while handling the flow of re-use of the same offset.
-	 * Since the registration flow is protected only by the interrupt lock, re-use flow
-	 * might request to move ts node to another interrupt list, and in such case we're
-	 * not protected.
-	 */
-	if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT)
-		mutex_lock(&hpriv->ctx->ts_reg_lock);
-
 	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
 		struct wait_interrupt_data wait_intr_data = {0};
 
@@ -3671,9 +3701,23 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		wait_intr_data.target_value = args->in.target;
 		wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
 
-		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
-				!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
-				&status, &timestamp);
+		if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
+			/*
+			 * Allow only one registration at a time. this is needed in order to prevent
+			 * issues while handling the flow of re-use of the same offset.
+			 * Since the registration flow is protected only by the interrupt lock,
+			 * re-use flow might request to move ts node to another interrupt list,
+			 * and in such case we're not protected.
+			 */
+			mutex_lock(&hpriv->ctx->ts_reg_lock);
+
+			rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
+
+			mutex_unlock(&hpriv->ctx->ts_reg_lock);
+		} else
+			rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
 	} else {
 		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
 				args->in.interrupt_timeout_us, args->in.addr,
@@ -3681,9 +3725,6 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 				&timestamp);
 	}
 
-	if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT)
-		mutex_unlock(&hpriv->ctx->ts_reg_lock);
-
 	if (rc)
 		return rc;
 
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 1342686d0ce5..7c2da8cfe844 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1128,7 +1128,9 @@ struct hl_ts_free_jobs {
  * @ts_free_jobs_data: timestamp free jobs related data
  * @type: user interrupt type
  * @wait_list_head: head to the list of user threads pending on this interrupt
+ * @ts_list_head: head to the list of timestamp records
  * @wait_list_lock: protects wait_list_head
+ * @ts_list_lock: protects ts_list_head
  * @timestamp: last timestamp taken upon interrupt
  * @interrupt_id: msix interrupt id
  */
@@ -1137,7 +1139,9 @@ struct hl_user_interrupt {
 	struct hl_ts_free_jobs		ts_free_jobs_data;
 	enum hl_user_interrupt_type	type;
 	struct list_head		wait_list_head;
+	struct list_head		ts_list_head;
 	spinlock_t			wait_list_lock;
+	spinlock_t			ts_list_lock;
 	ktime_t				timestamp;
 	u32				interrupt_id;
 };
@@ -1199,7 +1203,7 @@ struct timestamp_reg_info {
  * struct hl_user_pending_interrupt - holds a context to a user thread
  *                                    pending on an interrupt
  * @ts_reg_info: holds the timestamps registration nodes info
- * @wait_list_node: node in the list of user threads pending on an interrupt
+ * @list_node: node in the list of user threads pending on an interrupt or timestamp
  * @fence: hl fence object for interrupt completion
  * @cq_target_value: CQ target value
  * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
@@ -1207,7 +1211,7 @@ struct timestamp_reg_info {
  */
 struct hl_user_pending_interrupt {
 	struct timestamp_reg_info	ts_reg_info;
-	struct list_head		wait_list_node;
+	struct list_head		list_node;
 	struct hl_fence			fence;
 	u64				cq_target_value;
 	u64				*cq_kernel_addr;
@@ -2742,6 +2746,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 	usr_intr.type = intr_type; \
 	INIT_LIST_HEAD(&usr_intr.wait_list_head); \
 	spin_lock_init(&usr_intr.wait_list_lock); \
+	INIT_LIST_HEAD(&usr_intr.ts_list_head); \
+	spin_lock_init(&usr_intr.ts_list_lock); \
 })
 
 struct hwmon_chip_info;
@@ -3712,7 +3718,7 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
 irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 irqreturn_t hl_irq_handler_eq(int irq, void *arg);
 irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
-irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
+irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg);
 irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
 irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg);
 u32 hl_cq_inc_ptr(u32 ptr);
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index 0947d286a5ab..978b7f4d5eeb 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -304,7 +304,7 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 	dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
 			pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
 
-	list_del(&pend->wait_list_node);
+	list_del(&pend->list_node);
 
 	/* Putting the refcount for ts_buff and cq_cb objects will be handled
 	 * in workqueue context, just add job to free_list.
@@ -326,12 +326,13 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 	return 0;
 }
 
-static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr)
+static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
 {
 	struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
 	struct hl_user_pending_interrupt *pend, *temp_pend;
 	struct timestamp_reg_work_obj *job;
 	bool reg_node_handle_fail = false;
+	unsigned long flags;
 	int rc;
 
 	/* For registration nodes:
@@ -340,34 +341,27 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	 * or in irq handler context at all (since release functions are long and
 	 * might sleep), so we will need to handle that part in workqueue context.
 	 * To avoid handling kmalloc failure which compels us rolling back actions
-	 * and move nodes hanged on the free list back to the interrupt wait list
+	 * and move nodes hanged on the free list back to the interrupt ts list
 	 * we always alloc the job of the WQ at the beginning.
 	 */
 	job = kmalloc(sizeof(*job), GFP_ATOMIC);
 	if (!job)
 		return;
 
-	spin_lock(&intr->wait_list_lock);
-
-	list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
+	spin_lock_irqsave(&intr->ts_list_lock, flags);
+	list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
 		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
 				!pend->cq_kernel_addr) {
-			if (pend->ts_reg_info.buf) {
-				if (!reg_node_handle_fail) {
-					rc = handle_registration_node(hdev, pend,
-							&ts_reg_free_list_head,
-							&dynamic_alloc_list_head, intr);
-					if (rc)
-						reg_node_handle_fail = true;
-				}
-			} else {
-				/* Handle wait target value node */
-				pend->fence.timestamp = intr->timestamp;
-				complete_all(&pend->fence.completion);
+			if (!reg_node_handle_fail) {
+				rc = handle_registration_node(hdev, pend,
+						&ts_reg_free_list_head,
+						&dynamic_alloc_list_head, intr);
+				if (rc)
+					reg_node_handle_fail = true;
 			}
 		}
 	}
-	spin_unlock(&intr->wait_list_lock);
+	spin_unlock_irqrestore(&intr->ts_list_lock, flags);
 
 	if (ts_reg_free_list_head) {
 		INIT_WORK(&job->free_obj, hl_ts_free_objects);
@@ -380,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	}
 }
 
+static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
+{
+	struct hl_user_pending_interrupt *pend, *temp_pend;
+	unsigned long flags;
+
+	spin_lock_irqsave(&intr->wait_list_lock, flags);
+	list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
+		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
+				!pend->cq_kernel_addr) {
+			/* Handle wait target value node */
+			pend->fence.timestamp = intr->timestamp;
+			complete_all(&pend->fence.completion);
+		}
+	}
+	spin_unlock_irqrestore(&intr->wait_list_lock, flags);
+}
+
 static void handle_tpc_interrupt(struct hl_device *hdev)
 {
 	u64 event_mask;
@@ -401,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev)
 }
 
 /**
- * hl_irq_handler_user_interrupt - irq handler for user interrupts
+ * hl_irq_user_interrupt_handler - irq handler for user interrupts.
  *
  * @irq: irq number
  * @arg: pointer to user interrupt structure
- *
  */
-irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
+irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
 {
 	struct hl_user_interrupt *user_int = arg;
+	struct hl_device *hdev = user_int->hdev;
 
 	user_int->timestamp = ktime_get();
+	switch (user_int->type) {
+	case HL_USR_INTERRUPT_CQ:
+		/* First handle user waiters threads */
+		handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
+		handle_user_interrupt_wait_list(hdev, user_int);
 
-	return IRQ_WAKE_THREAD;
+		/* Second handle user timestamp registrations */
+		handle_user_interrupt_ts_list(hdev,  &hdev->common_user_cq_interrupt);
+		handle_user_interrupt_ts_list(hdev, user_int);
+		break;
+	case HL_USR_INTERRUPT_DECODER:
+		handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
+
+		/* Handle decoder interrupt registered on this specific irq */
+		handle_user_interrupt_wait_list(hdev, user_int);
+		break;
+	default:
+		break;
+	}
+
+	return IRQ_HANDLED;
 }
 
 /**
@@ -429,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
 	struct hl_user_interrupt *user_int = arg;
 	struct hl_device *hdev = user_int->hdev;
 
+	user_int->timestamp = ktime_get();
 	switch (user_int->type) {
-	case HL_USR_INTERRUPT_CQ:
-		handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt);
-
-		/* Handle user cq interrupt registered on this specific irq */
-		handle_user_interrupt(hdev, user_int);
-		break;
-	case HL_USR_INTERRUPT_DECODER:
-		handle_user_interrupt(hdev, &hdev->common_decoder_interrupt);
-
-		/* Handle decoder interrupt registered on this specific irq */
-		handle_user_interrupt(hdev, user_int);
-		break;
 	case HL_USR_INTERRUPT_TPC:
 		handle_tpc_interrupt(hdev);
 		break;
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index b0ba62b691ec..867175431418 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -4227,9 +4227,7 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev)
 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
 						gaudi2_irq_name(i), (void *) dec);
 		} else {
-			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-					gaudi2_irq_name(i),
+			rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
 					(void *) &hdev->user_interrupt[dec->core_id]);
 		}
 
@@ -4287,17 +4285,17 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 	}
 
 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
-	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
+	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+					gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
+					&hdev->tpc_interrupt);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
 		goto free_dec_irq;
 	}
 
 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
-	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
-			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
+	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+					gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
 					&hdev->unexpected_error_interrupt);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
@@ -4309,10 +4307,8 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 			i++, j++, user_irq_init_cnt++) {
 
 		irq = pci_irq_vector(hdev->pdev, i);
-		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
-
+		rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
+				&hdev->user_interrupt[j]);
 		if (rc) {
 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
 			goto free_user_irq;

From bb644f61970ad44147c9466b0cb99a028de02138 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 17 Aug 2023 21:47:49 +0300
Subject: [PATCH 54/77] accel/habanalabs: fix SG table creation for dma-buf
 mapping

In some cases the calculated number of required entries for the dma-buf
SG table is wrong. For example, if the page size is larger than both the
dma max segment size of the importer device and from the exported side,
or if the exported size is part of a phys_pg_pack that is composed of
several pages.
In these cases, redundant entries will be added to the SG table.

Modify the method that the number of entries is calculated, and the way
they are prepared.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/memory.c | 191 ++++++++++++-----------
 1 file changed, 100 insertions(+), 91 deletions(-)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index c09c066a0db4..f42cb04b578b 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1535,21 +1535,17 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 						u64 page_size, u64 exported_size,
 						struct device *dev, enum dma_data_direction dir)
 {
-	u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
-	struct asic_fixed_properties *prop;
-	int rc, i, j, nents, cur_page;
+	u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
+		left_size_in_dma_seg, device_address, bar_address;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
+	unsigned int nents, i;
 	struct sg_table *sgt;
+	bool next_sg_entry;
+	int rc;
 
-	prop = &hdev->asic_prop;
-
-	dma_max_seg_size = dma_get_max_seg_size(dev);
-
-	/* We would like to align the max segment size to PAGE_SIZE, so the
-	 * SGL will contain aligned addresses that can be easily mapped to
-	 * an MMU
-	 */
-	dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
+	/* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
+	dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
 	if (dma_max_seg_size < PAGE_SIZE) {
 		dev_err_ratelimited(hdev->dev,
 				"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
@@ -1561,120 +1557,133 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);
 
-	cur_size_to_export = exported_size;
+	/* Calculate the required number of entries for the SG table */
+	curr_page = 0;
+	nents = 1;
+	left_size_to_export = exported_size;
+	left_size_in_page = page_size;
+	left_size_in_dma_seg = dma_max_seg_size;
+	next_sg_entry = false;
 
-	/* If the size of each page is larger than the dma max segment size,
-	 * then we can't combine pages and the number of entries in the SGL
-	 * will just be the
-	 * <number of pages> * <chunks of max segment size in each page>
-	 */
-	if (page_size > dma_max_seg_size) {
-		/* we should limit number of pages according to the exported size */
-		cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
-		nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
-	} else {
-		cur_npages = npages;
+	while (true) {
+		size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+		left_size_to_export -= size;
+		left_size_in_page -= size;
+		left_size_in_dma_seg -= size;
 
-		/* Get number of non-contiguous chunks */
-		for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
-			if (pages[i - 1] + page_size != pages[i] ||
-					chunk_size + page_size > dma_max_seg_size) {
-				nents++;
-				chunk_size = page_size;
-				continue;
-			}
+		if (!left_size_to_export)
+			break;
 
-			chunk_size += page_size;
+		if (!left_size_in_page) {
+			/* left_size_to_export is not zero so there must be another page */
+			if (pages[curr_page] + page_size != pages[curr_page + 1])
+				next_sg_entry = true;
+
+			++curr_page;
+			left_size_in_page = page_size;
+		}
+
+		if (!left_size_in_dma_seg) {
+			next_sg_entry = true;
+			left_size_in_dma_seg = dma_max_seg_size;
+		}
+
+		if (next_sg_entry) {
+			++nents;
+			next_sg_entry = false;
 		}
 	}
 
 	rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
 	if (rc)
-		goto error_free;
+		goto err_free_sgt;
 
-	cur_page = 0;
+	/* Prepare the SG table entries */
+	curr_page = 0;
+	device_address = pages[curr_page];
+	left_size_to_export = exported_size;
+	left_size_in_page = page_size;
+	left_size_in_dma_seg = dma_max_seg_size;
+	next_sg_entry = false;
 
-	if (page_size > dma_max_seg_size) {
-		u64 size_left, cur_device_address = 0;
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address);
+		chunk_size = 0;
 
-		size_left = page_size;
+		for ( ; curr_page < npages ; ++curr_page) {
+			size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+			chunk_size += size;
+			left_size_to_export -= size;
+			left_size_in_page -= size;
+			left_size_in_dma_seg -= size;
 
-		/* Need to split each page into the number of chunks of
-		 * dma_max_seg_size
-		 */
-		for_each_sgtable_dma_sg(sgt, sg, i) {
-			if (size_left == page_size)
-				cur_device_address =
-					pages[cur_page] - prop->dram_base_address;
-			else
-				cur_device_address += dma_max_seg_size;
+			if (!left_size_to_export)
+				break;
 
-			/* make sure not to export over exported size */
-			chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
+			if (!left_size_in_page) {
+				/* left_size_to_export is not zero so there must be another page */
+				if (pages[curr_page] + page_size != pages[curr_page + 1]) {
+					device_address = pages[curr_page + 1];
+					next_sg_entry = true;
+				}
 
-			bar_address = hdev->dram_pci_bar_start + cur_device_address;
-
-			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
-			if (rc)
-				goto error_unmap;
-
-			cur_size_to_export -= chunk_size;
-
-			if (size_left > dma_max_seg_size) {
-				size_left -= dma_max_seg_size;
-			} else {
-				cur_page++;
-				size_left = page_size;
-			}
-		}
-	} else {
-		/* Merge pages and put them into the scatterlist */
-		for_each_sgtable_dma_sg(sgt, sg, i) {
-			chunk_size = page_size;
-			for (j = cur_page + 1 ; j < cur_npages ; j++) {
-				if (pages[j - 1] + page_size != pages[j] ||
-						chunk_size + page_size > dma_max_seg_size)
-					break;
-
-				chunk_size += page_size;
+				left_size_in_page = page_size;
 			}
 
-			bar_address = hdev->dram_pci_bar_start +
-					(pages[cur_page] - prop->dram_base_address);
+			if (!left_size_in_dma_seg) {
+				/*
+				 * Skip setting a new device address if already moving to a page
+				 * which is not contiguous with the current page.
+				 */
+				if (!next_sg_entry) {
+					device_address += chunk_size;
+					next_sg_entry = true;
+				}
 
-			/* make sure not to export over exported size */
-			chunk_size = min(chunk_size, cur_size_to_export);
-			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
-			if (rc)
-				goto error_unmap;
+				left_size_in_dma_seg = dma_max_seg_size;
+			}
 
-			cur_size_to_export -= chunk_size;
-			cur_page = j;
+			if (next_sg_entry) {
+				next_sg_entry = false;
+				break;
+			}
 		}
+
+		rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+		if (rc)
+			goto err_unmap;
 	}
 
-	/* Because we are not going to include a CPU list we want to have some
-	 * chance that other users will detect this by setting the orig_nents
-	 * to 0 and using only nents (length of DMA list) when going over the
-	 * sgl
+	/* There should be nothing left to export exactly after looping over all SG elements */
+	if (left_size_to_export) {
+		dev_err(hdev->dev,
+			"left size to export %#llx after initializing %u SG elements\n",
+			left_size_to_export, sgt->nents);
+		rc = -ENOMEM;
+		goto err_unmap;
+	}
+
+	/*
+	 * Because we are not going to include a CPU list, we want to have some chance that other
+	 * users will detect this when going over SG table, by setting the orig_nents to 0 and using
+	 * only nents (length of DMA list).
 	 */
 	sgt->orig_nents = 0;
 
 	return sgt;
 
-error_unmap:
+err_unmap:
 	for_each_sgtable_dma_sg(sgt, sg, i) {
 		if (!sg_dma_len(sg))
 			continue;
 
-		dma_unmap_resource(dev, sg_dma_address(sg),
-					sg_dma_len(sg), dir,
+		dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir,
 					DMA_ATTR_SKIP_CPU_SYNC);
 	}
 
 	sg_free_table(sgt);
 
-error_free:
+err_free_sgt:
 	kfree(sgt);
 	return ERR_PTR(rc);
 }

From 878ebc14db049ae85225756b187fec81df8108b3 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Sun, 20 Aug 2023 14:17:06 +0300
Subject: [PATCH 55/77] accel/habanalabs: set hl_dmabuf_priv.device_address
 only when needed

The device_address member of 'struct hl_dmabuf_priv' is used only when
virtual device memory is not supported and dma-buf is exported from
address.
Set the value of this field only when it is relevant, and add "phys" to
its name so it would be clearer that it can't be a device virtual
address.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs.h | 8 ++++----
 drivers/accel/habanalabs/common/memory.c     | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 7c2da8cfe844..43e682cdeb75 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1811,16 +1811,16 @@ struct hl_cs_counters_atomic {
  * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported
  *                where virtual memory is supported.
  * @memhash_hnode: pointer to the memhash node. this object holds the export count.
- * @device_address: physical address of the device's memory. Relevant only
- *                  if phys_pg_pack is NULL (dma-buf was exported from address).
- *                  The total size can be taken from the dmabuf object.
+ * device_phys_addr: physical address of the device's memory. Relevant only
+ *                   if phys_pg_pack is NULL (dma-buf was exported from address).
+ *                   The total size can be taken from the dmabuf object.
  */
 struct hl_dmabuf_priv {
 	struct dma_buf			*dmabuf;
 	struct hl_ctx			*ctx;
 	struct hl_vm_phys_pg_pack	*phys_pg_pack;
 	struct hl_vm_hash_node		*memhash_hnode;
-	uint64_t			device_address;
+	u64				device_phys_addr;
 };
 
 #define HL_CS_OUTCOME_HISTORY_LEN 256
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index f42cb04b578b..fe896edc18ac 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1731,7 +1731,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 		npages = phys_pg_pack->npages;
 		page_size = phys_pg_pack->page_size;
 	} else {
-		pages = &hl_dmabuf->device_address;
+		pages = &hl_dmabuf->device_phys_addr;
 		npages = 1;
 		page_size = hl_dmabuf->dmabuf->size;
 	}
@@ -2064,9 +2064,9 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 		rc = validate_export_params_no_mmu(hdev, export_addr, size);
 		if (rc)
 			goto err_free_dmabuf_wrapper;
-	}
 
-	hl_dmabuf->device_address = export_addr;
+		hl_dmabuf->device_phys_addr = export_addr;
+	}
 
 	rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);
 	if (rc)

From d16945f60274d5826a1c7c6581dc5878247a5951 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Sun, 20 Aug 2023 15:33:43 +0300
Subject: [PATCH 56/77] accel/habanalabs: add missing offset handling for
 dma-buf

On devices with virtual device memory (Gaudi2 onwards), user can provide
an offset within an allocated device memory from which he wants to
export a dma-buf object.
The offset value is verified by driver, but it is not taken into
consideration when the importer driver maps the dma-buf and the SG table
it prepared.
Add the missing offset handling.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs.h |  3 +
 drivers/accel/habanalabs/common/memory.c     | 75 +++++++++++---------
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 43e682cdeb75..874ae76cbd78 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1811,6 +1811,8 @@ struct hl_cs_counters_atomic {
  * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported
  *                where virtual memory is supported.
  * @memhash_hnode: pointer to the memhash node. this object holds the export count.
+ * @offset: the offset into the buffer from which the memory is exported.
+ *          Relevant only if virtual memory is supported and phys_pg_pack is being used.
  * device_phys_addr: physical address of the device's memory. Relevant only
  *                   if phys_pg_pack is NULL (dma-buf was exported from address).
  *                   The total size can be taken from the dmabuf object.
@@ -1820,6 +1822,7 @@ struct hl_dmabuf_priv {
 	struct hl_ctx			*ctx;
 	struct hl_vm_phys_pg_pack	*phys_pg_pack;
 	struct hl_vm_hash_node		*memhash_hnode;
+	u64				offset;
 	u64				device_phys_addr;
 };
 
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index fe896edc18ac..7ddfaf17d15a 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1532,11 +1532,11 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
 }
 
 static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
-						u64 page_size, u64 exported_size,
+						u64 page_size, u64 exported_size, u64 offset,
 						struct device *dev, enum dma_data_direction dir)
 {
 	u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
-		left_size_in_dma_seg, device_address, bar_address;
+		left_size_in_dma_seg, device_address, bar_address, start_page;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
 	unsigned int nents, i;
@@ -1557,11 +1557,20 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);
 
+	/* Use the offset to move to the actual first page that is exported */
+	for (start_page = 0 ; start_page < npages ; ++start_page) {
+		if (offset < page_size)
+			break;
+
+		/* The offset value was validated so there can't be an underflow */
+		offset -= page_size;
+	}
+
 	/* Calculate the required number of entries for the SG table */
-	curr_page = 0;
+	curr_page = start_page;
 	nents = 1;
 	left_size_to_export = exported_size;
-	left_size_in_page = page_size;
+	left_size_in_page = page_size - offset;
 	left_size_in_dma_seg = dma_max_seg_size;
 	next_sg_entry = false;
 
@@ -1599,10 +1608,10 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 		goto err_free_sgt;
 
 	/* Prepare the SG table entries */
-	curr_page = 0;
-	device_address = pages[curr_page];
+	curr_page = start_page;
+	device_address = pages[curr_page] + offset;
 	left_size_to_export = exported_size;
-	left_size_in_page = page_size;
+	left_size_in_page = page_size - offset;
 	left_size_in_dma_seg = dma_max_seg_size;
 	next_sg_entry = false;
 
@@ -1708,7 +1717,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
 static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 					enum dma_data_direction dir)
 {
-	u64 *pages, npages, page_size, exported_size;
+	u64 *pages, npages, page_size, exported_size, offset;
 	struct dma_buf *dma_buf = attachment->dmabuf;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct hl_dmabuf_priv *hl_dmabuf;
@@ -1724,6 +1733,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 	}
 
 	exported_size = hl_dmabuf->dmabuf->size;
+	offset = hl_dmabuf->offset;
 	phys_pg_pack = hl_dmabuf->phys_pg_pack;
 
 	if (phys_pg_pack) {
@@ -1736,7 +1746,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 		page_size = hl_dmabuf->dmabuf->size;
 	}
 
-	sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size,
+	sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset,
 						attachment->dev, dir);
 	if (IS_ERR(sgt))
 		dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
@@ -1882,12 +1892,12 @@ static int export_dmabuf(struct hl_ctx *ctx,
 	return rc;
 }
 
-static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
+static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
 {
-	if (!PAGE_ALIGNED(device_addr)) {
+	if (!PAGE_ALIGNED(addr)) {
 		dev_dbg(hdev->dev,
 			"exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n",
-			device_addr, PAGE_SIZE);
+			addr, PAGE_SIZE);
 		return -EINVAL;
 	}
 
@@ -1898,6 +1908,13 @@ static int validate_export_params_common(struct hl_device *hdev, u64 device_addr
 		return -EINVAL;
 	}
 
+	if (!PAGE_ALIGNED(offset)) {
+		dev_dbg(hdev->dev,
+			"exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
+			offset, PAGE_SIZE);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -1907,13 +1924,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr
 	u64 bar_address;
 	int rc;
 
-	rc = validate_export_params_common(hdev, device_addr, size);
+	rc = validate_export_params_common(hdev, device_addr, size, 0);
 	if (rc)
 		return rc;
 
 	if (device_addr < prop->dram_user_base_address ||
-				(device_addr + size) > prop->dram_end_address ||
-				(device_addr + size) < device_addr) {
+			(device_addr + size) > prop->dram_end_address ||
+			(device_addr + size) < device_addr) {
 		dev_dbg(hdev->dev,
 			"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
 			device_addr, size);
@@ -1940,36 +1957,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
 	u64 bar_address;
 	int i, rc;
 
-	rc = validate_export_params_common(hdev, device_addr, size);
+	rc = validate_export_params_common(hdev, device_addr, size, offset);
 	if (rc)
 		return rc;
 
-	if (!PAGE_ALIGNED(offset)) {
-		dev_dbg(hdev->dev,
-			"exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
-			offset, PAGE_SIZE);
-		return -EINVAL;
-	}
-
 	if ((offset + size) > phys_pg_pack->total_size) {
 		dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
-				offset, size, phys_pg_pack->total_size);
+			offset, size, phys_pg_pack->total_size);
 		return -EINVAL;
 	}
 
 	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-
 		bar_address = hdev->dram_pci_bar_start +
-					(phys_pg_pack->pages[i] - prop->dram_base_address);
+				(phys_pg_pack->pages[i] - prop->dram_base_address);
 
 		if ((bar_address + phys_pg_pack->page_size) >
 				(hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
 				(bar_address + phys_pg_pack->page_size) < bar_address) {
 			dev_dbg(hdev->dev,
 				"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
-					phys_pg_pack->pages[i],
-					phys_pg_pack->page_size);
-
+				phys_pg_pack->pages[i], phys_pg_pack->page_size);
 			return -EINVAL;
 		}
 	}
@@ -2025,7 +2032,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 	struct asic_fixed_properties *prop;
 	struct hl_dmabuf_priv *hl_dmabuf;
 	struct hl_device *hdev;
-	u64 export_addr;
 	int rc;
 
 	hdev = ctx->hdev;
@@ -2037,8 +2043,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 		return -EINVAL;
 	}
 
-	export_addr = addr + offset;
-
 	hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
 	if (!hl_dmabuf)
 		return -ENOMEM;
@@ -2054,18 +2058,19 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 			rc = PTR_ERR(phys_pg_pack);
 			goto dec_memhash_export_cnt;
 		}
-		rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack);
+		rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack);
 		if (rc)
 			goto dec_memhash_export_cnt;
 
 		hl_dmabuf->phys_pg_pack = phys_pg_pack;
 		hl_dmabuf->memhash_hnode = hnode;
+		hl_dmabuf->offset = offset;
 	} else {
-		rc = validate_export_params_no_mmu(hdev, export_addr, size);
+		rc = validate_export_params_no_mmu(hdev, addr, size);
 		if (rc)
 			goto err_free_dmabuf_wrapper;
 
-		hl_dmabuf->device_phys_addr = export_addr;
+		hl_dmabuf->device_phys_addr = addr;
 	}
 
 	rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);

From 3824be1f4d4282190e82a9d5113f4079cb0796e9 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 21 Aug 2023 02:50:54 +0300
Subject: [PATCH 57/77] accel/habanalabs: add debug prints to dump content of
 SG table for dma-buf

Add debug prints to dump the content of the SG table which is prepared
when the dma-buf map op is called.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/memory.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 7ddfaf17d15a..c465dc828b0c 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1679,6 +1679,13 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 	 */
 	sgt->orig_nents = 0;
 
+	dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n",
+		nents, dev_name(dev));
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		dev_dbg(hdev->dev,
+			"SG entry %d: address %#llx, length %#x\n",
+			i, sg_dma_address(sg), sg_dma_len(sg));
+
 	return sgt;
 
 err_unmap:

From 9dca13141332e69fd657873194e77a1960fc9ab2 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Thu, 7 Sep 2023 14:43:01 +0300
Subject: [PATCH 58/77] accel/habanalabs: add fw status SHUTDOWN_PREP

update hl_boot_if.h from specs to include
CPU_BOOT_STATUS_FW_SHUTDOWN_PREP

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/linux/habanalabs/hl_boot_if.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h
index 7de8a5786a36..93366d5621fd 100644
--- a/include/linux/habanalabs/hl_boot_if.h
+++ b/include/linux/habanalabs/hl_boot_if.h
@@ -394,6 +394,8 @@ enum cpu_boot_status {
 	CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
 	/* Internal Security has been initialized, device can be accessed */
 	CPU_BOOT_STATUS_SECURITY_READY = 17,
+	/* FW component is preparing to shutdown and communication with host is not available */
+	CPU_BOOT_STATUS_FW_SHUTDOWN_PREP = 18,
 };
 
 enum kmd_msg {

From 674f77798e432509e60919e2909fe6c72f5f2267 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Thu, 7 Sep 2023 07:44:23 +0300
Subject: [PATCH 59/77] accel/habanalabs: extend preboot timeout when preboot
 might take longer

There are cases such when FW runs MBIST, that preboot is expected to take
longer than the usual. In such cases the firmware reports status
SECURITY_READY/IN_PREBOOT and we extend the timeout waiting for it.
This is currently implemented for Gaudi2 only.

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/firmware_if.c | 26 ++++++++++++++++---
 drivers/accel/habanalabs/common/habanalabs.h  |  3 +++
 drivers/accel/habanalabs/gaudi2/gaudi2.c      |  2 ++
 drivers/accel/habanalabs/gaudi2/gaudi2P.h     |  1 +
 4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 2a6dfea3d27d..0fb360cca9cc 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -1474,8 +1474,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 {
 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
-	u32 status;
-	int rc;
+	u32 status = 0, timeout;
+	int rc, tries = 1;
+	bool preboot_still_runs;
 
 	/* Need to check two possible scenarios:
 	 *
@@ -1485,6 +1486,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 	 * All other status values - for older firmwares where the uboot was
 	 * loaded from the FLASH
 	 */
+	timeout = pre_fw_load->wait_for_preboot_timeout;
+retry:
 	rc = hl_poll_timeout(
 		hdev,
 		pre_fw_load->cpu_boot_status_reg,
@@ -1493,7 +1496,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
 		(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
 		hdev->fw_poll_interval_usec,
-		pre_fw_load->wait_for_preboot_timeout);
+		timeout);
+	/*
+	 * if F/W reports "security-ready" it means preboot might take longer.
+	 * If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
+	 * with that timeout
+	 */
+	preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
+				status == CPU_BOOT_STATUS_IN_PREBOOT ||
+				status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
+				status == CPU_BOOT_STATUS_DRAM_RDY);
+
+	if (rc && tries && preboot_still_runs) {
+		tries--;
+		if (pre_fw_load->wait_for_preboot_extended_timeout) {
+			timeout = pre_fw_load->wait_for_preboot_extended_timeout;
+			goto retry;
+		}
+	}
 
 	if (rc) {
 		detect_cpu_boot_status(hdev, status);
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 874ae76cbd78..3c224942a758 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1410,6 +1410,8 @@ struct dynamic_fw_load_mgr {
  * @boot_err0_reg: boot_err0 register address
  * @boot_err1_reg: boot_err1 register address
  * @wait_for_preboot_timeout: timeout to poll for preboot ready
+ * @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know
+ *		preboot needs longer time.
  */
 struct pre_fw_load_props {
 	u32 cpu_boot_status_reg;
@@ -1418,6 +1420,7 @@ struct pre_fw_load_props {
 	u32 boot_err0_reg;
 	u32 boot_err1_reg;
 	u32 wait_for_preboot_timeout;
+	u32 wait_for_preboot_extended_timeout;
 };
 
 /**
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 867175431418..35db02e5010b 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -4825,6 +4825,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
+	pre_fw_load->wait_for_preboot_extended_timeout =
+		GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
 }
 
 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index 14e281fd9895..9b9eef0d97d6 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -84,6 +84,7 @@
 #define CORESIGHT_TIMEOUT_USEC			100000		/* 100 ms */
 
 #define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC		25000000	/* 25s */
+#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000	/* 85s */
 
 #define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC	10000000	/* 10s */
 

From de8773fdc500cfeb78064e5b50540b1fbf10448e Mon Sep 17 00:00:00 2001
From: Ariel Suller <asuller@habana.ai>
Date: Mon, 11 Sep 2023 17:43:07 +0300
Subject: [PATCH 60/77] accel/habanalabs: update boot status print

FW shutdown preparation status was added to spec.

Signed-off-by: Ariel Suller <asuller@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/firmware_if.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 0fb360cca9cc..47e8384134aa 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -1464,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 		dev_err(hdev->dev,
 			"Device boot progress - Stuck in preboot after security initialization\n");
 		break;
+	case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
+		dev_err(hdev->dev,
+			"Device boot progress - Stuck in preparation for shutdown\n");
+		break;
 	default:
 		dev_err(hdev->dev,
 			"Device boot progress - Invalid or unexpected status code %d\n", status);

From d7aa294805a865a9b3e71e38b46e48df41170554 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Tue, 19 Sep 2023 13:26:02 +0300
Subject: [PATCH 61/77] accel/habanalabs: remove unused asic functions

asic_dma_{un}map_single() asic-specific functions are no longer called
from the common code, so delete these functions.

In addition, delete the gaudi2 implementation as they are also not
called.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/habanalabs.h |  8 --------
 drivers/accel/habanalabs/gaudi2/gaudi2.c     | 20 --------------------
 2 files changed, 28 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 3c224942a758..4f667ac46301 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1520,8 +1520,6 @@ struct engines_data {
  * @asic_dma_pool_free: free small DMA allocation from pool.
  * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
  * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @asic_dma_unmap_single: unmap a single DMA buffer
- * @asic_dma_map_single: map a single buffer to a DMA
  * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
  * @cs_parser: parse Command Submission.
  * @asic_dma_map_sgtable: DMA map scatter-gather table.
@@ -1645,12 +1643,6 @@ struct hl_asic_funcs {
 				size_t size, dma_addr_t *dma_handle);
 	void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
 				size_t size, void *vaddr);
-	void (*asic_dma_unmap_single)(struct hl_device *hdev,
-				dma_addr_t dma_addr, int len,
-				enum dma_data_direction dir);
-	dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev,
-				void *addr, int len,
-				enum dma_data_direction dir);
 	void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
 				struct sg_table *sgt,
 				enum dma_data_direction dir);
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 35db02e5010b..be4a745f8548 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -6731,24 +6731,6 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s
 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }
 
-static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
-					enum dma_data_direction dir)
-{
-	dma_addr_t dma_addr;
-
-	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
-	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
-		return 0;
-
-	return dma_addr;
-}
-
-static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
-					enum dma_data_direction dir)
-{
-	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
-}
-
 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
 {
 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
@@ -11515,8 +11497,6 @@ static const struct hl_asic_funcs gaudi2_funcs = {
 	.asic_dma_pool_free = gaudi2_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
-	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
-	.asic_dma_map_single = gaudi2_dma_map_single,
 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
 	.cs_parser = gaudi2_cs_parser,
 	.asic_dma_map_sgtable = hl_dma_map_sgtable,

From 309ed969032c99e14ffabbc29ecf51e659e0ee4b Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Sun, 27 Aug 2023 10:54:06 +0300
Subject: [PATCH 62/77] accel/habanalabs: add traces for dma mappings

In order to get a full picture of DMA mappings (e.g. to track DMAR
errors), DMA mappings APIs should be covered.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/trace/events/habanalabs.h | 43 +++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/include/trace/events/habanalabs.h b/include/trace/events/habanalabs.h
index 951643e6a7a9..0d3667399d01 100644
--- a/include/trace/events/habanalabs.h
+++ b/include/trace/events/habanalabs.h
@@ -87,6 +87,49 @@ DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_free,
 	TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
 	TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
 
+DECLARE_EVENT_CLASS(habanalabs_dma_map_template,
+	TP_PROTO(struct device *dev, u64 phys_addr, u64 dma_addr, size_t len,
+			enum dma_data_direction dir, const char *caller),
+
+	TP_ARGS(dev, phys_addr, dma_addr, len, dir, caller),
+
+	TP_STRUCT__entry(
+		__string(dname, dev_name(dev))
+		__field(u64, phys_addr)
+		__field(u64, dma_addr)
+		__field(u32, len)
+		__field(int, dir)
+		__field(const char *, caller)
+	),
+
+	TP_fast_assign(
+		__assign_str(dname, dev_name(dev));
+		__entry->phys_addr = phys_addr;
+		__entry->dma_addr = dma_addr;
+		__entry->len = len;
+		__entry->dir = dir;
+		__entry->caller = caller;
+	),
+
+	TP_printk("%s: phys_addr: %#llx, dma_addr: %#llx, len: %#x, dir: %d, caller: %s",
+		__get_str(dname),
+		__entry->phys_addr,
+		__entry->dma_addr,
+		__entry->len,
+		__entry->dir,
+		__entry->caller)
+);
+
+DEFINE_EVENT(habanalabs_dma_map_template, habanalabs_dma_map_page,
+	TP_PROTO(struct device *dev, u64 phys_addr, u64 dma_addr, size_t len,
+			enum dma_data_direction dir, const char *caller),
+	TP_ARGS(dev, phys_addr, dma_addr, len, dir, caller));
+
+DEFINE_EVENT(habanalabs_dma_map_template, habanalabs_dma_unmap_page,
+	TP_PROTO(struct device *dev, u64 phys_addr, u64 dma_addr, size_t len,
+			enum dma_data_direction dir, const char *caller),
+	TP_ARGS(dev, phys_addr, dma_addr, len, dir, caller));
+
 DECLARE_EVENT_CLASS(habanalabs_comms_template,
 	TP_PROTO(struct device *dev, char *op_str),
 

From ff92d010526749ecdcfc2e01bd2f2c8411405f53 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Sun, 27 Aug 2023 15:12:52 +0300
Subject: [PATCH 63/77] accel/habanalabs: trace dma map sgtable

Traces the DMA [un]map_sgtable using the new traces we added.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c     | 56 +++++++++++++++++++-
 drivers/accel/habanalabs/common/habanalabs.h | 25 ++++++---
 drivers/accel/habanalabs/common/memory.c     |  4 +-
 drivers/accel/habanalabs/gaudi/gaudi.c       |  6 +--
 drivers/accel/habanalabs/gaudi2/gaudi2.c     |  4 +-
 drivers/accel/habanalabs/goya/goya.c         |  6 +--
 6 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 8cca4050d276..b1d330cd2537 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -188,7 +188,36 @@ void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *
 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }
 
-int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir, const char *caller)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct scatterlist *sg;
+	int rc, i;
+
+	rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir);
+	if (rc)
+		return rc;
+
+	if (!trace_habanalabs_dma_map_page_enabled())
+		return 0;
+
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		trace_habanalabs_dma_map_page(hdev->dev,
+				page_to_phys(sg_page(sg)),
+				sg->dma_address - prop->device_dma_offset_for_host_access,
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+				sg->dma_length,
+#else
+				sg->length,
+#endif
+				dir, caller);
+
+	return 0;
+}
+
+int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
@@ -206,7 +235,30 @@ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_da
 	return 0;
 }
 
-void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+					enum dma_data_direction dir, const char *caller)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct scatterlist *sg;
+	int i;
+
+	hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir);
+
+	if (trace_habanalabs_dma_unmap_page_enabled()) {
+		for_each_sgtable_dma_sg(sgt, sg, i)
+			trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)),
+					sg->dma_address - prop->device_dma_offset_for_host_access,
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+					sg->dma_length,
+#else
+					sg->length,
+#endif
+					dir, caller);
+	}
+}
+
+void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 4f667ac46301..b1b1c51aa5b1 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -159,6 +159,11 @@ enum hl_mmu_page_table_location {
 #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
 	hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
 
+#define hl_dma_map_sgtable(hdev, sgt, dir) \
+	hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__)
+#define hl_dma_unmap_sgtable(hdev, sgt, dir) \
+	hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__)
+
 /*
  * Reset Flags
  *
@@ -1520,9 +1525,9 @@ struct engines_data {
  * @asic_dma_pool_free: free small DMA allocation from pool.
  * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
  * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
+ * @dma_unmap_sgtable: DMA unmap scatter-gather table.
+ * @dma_map_sgtable: DMA map scatter-gather table.
  * @cs_parser: parse Command Submission.
- * @asic_dma_map_sgtable: DMA map scatter-gather table.
  * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
  * @update_eq_ci: update event queue CI.
  * @context_switch: called upon ASID context switch.
@@ -1643,12 +1648,11 @@ struct hl_asic_funcs {
 				size_t size, dma_addr_t *dma_handle);
 	void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
 				size_t size, void *vaddr);
-	void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
-				struct sg_table *sgt,
+	void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir);
+	int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
 	int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
-	int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
-				enum dma_data_direction dir);
 	void (*add_end_of_cb_packets)(struct hl_device *hdev,
 					void *kernel_address, u32 len,
 					u32 original_len,
@@ -3670,8 +3674,13 @@ void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t
 					dma_addr_t *dma_handle, const char *caller);
 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
 					const char *caller);
-int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
-void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir, const char *caller);
+void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+					enum dma_data_direction dir, const char *caller);
+int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir);
+void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
 int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar);
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index c465dc828b0c..ba59e921236e 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
 
 	*p_userptr = userptr;
 
-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto dma_map_err;
@@ -2445,7 +2445,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 	hl_debugfs_remove_userptr(hdev, userptr);
 
 	if (userptr->dma_mapped)
-		hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
+		hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
 
 	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
 	kvfree(userptr->pages);
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index 29fb71c8b47f..31e04af46876 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -4908,7 +4908,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 
 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
 
-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto unpin_memory;
@@ -9144,9 +9144,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.asic_dma_pool_free = gaudi_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = gaudi_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
 	.update_eq_ci = gaudi_update_eq_ci,
 	.context_switch = gaudi_context_switch,
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index be4a745f8548..819660c684cf 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -11497,9 +11497,9 @@ static const struct hl_asic_funcs gaudi2_funcs = {
 	.asic_dma_pool_free = gaudi2_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = gaudi2_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = NULL,
 	.update_eq_ci = gaudi2_update_eq_ci,
 	.context_switch = gaudi2_context_switch,
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index 024ccf2e159b..1322cb330c57 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
 
 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
 
-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto unpin_memory;
@@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = {
 	.asic_dma_pool_free = goya_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = goya_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = goya_add_end_of_cb_packets,
 	.update_eq_ci = goya_update_eq_ci,
 	.context_switch = goya_context_switch,

From 7f1cd6fdd5872160da05098a84a94dee5e709e54 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 14:52:01 +0300
Subject: [PATCH 64/77] accel/habanalabs: minor cosmetics update to cpucp_if.h

- Update copyright years
- Align comments

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 include/linux/habanalabs/cpucp_if.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 84d74c4ee4d3..86ea7c63a0d2 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2020-2022 HabanaLabs, Ltd.
+ * Copyright 2020-2023 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -668,18 +668,15 @@ enum pq_init_status {
  *       Obsolete.
  *
  * CPUCP_PACKET_GENERIC_PASSTHROUGH -
- *      Generic opcode for all firmware info that is only passed to host
- *      through the LKD, without getting parsed there.
+ *       Generic opcode for all firmware info that is only passed to host
+ *       through the LKD, without getting parsed there.
  *
  * CPUCP_PACKET_ACTIVE_STATUS_SET -
  *       LKD sends FW indication whether device is free or in use, this indication is reported
  *       also to the BMC.
  *
- * CPUCP_PACKET_REGISTER_INTERRUPTS -
- *       Packet to register interrupts indicating LKD is ready to receive events from FW.
- *
  * CPUCP_PACKET_SOFT_RESET -
- *      Packet to perform soft-reset.
+ *       Packet to perform soft-reset.
  *
  * CPUCP_PACKET_INTS_REGISTER -
  *       Packet to inform FW that queues have been established and LKD is ready to receive
@@ -750,9 +747,9 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_RESERVED11,		/* not used */
 	CPUCP_PACKET_RESERVED12,		/* internal */
 	CPUCP_PACKET_RESERVED13,                /* internal */
-	CPUCP_PACKET_SOFT_RESET,                /* internal */
-	CPUCP_PACKET_INTS_REGISTER,             /* internal */
-	CPUCP_PACKET_ID_MAX                     /* must be last */
+	CPUCP_PACKET_SOFT_RESET,		/* internal */
+	CPUCP_PACKET_INTS_REGISTER,		/* internal */
+	CPUCP_PACKET_ID_MAX			/* must be last */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5

From a43557dcd429a310e0011737fe12bb8ebf90fcc5 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 14:52:39 +0300
Subject: [PATCH 65/77] accel/habanalabs: minor cosmetics update to trace file

- Update copyright years
- Add missing newline at end of file

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 include/trace/events/habanalabs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/habanalabs.h b/include/trace/events/habanalabs.h
index 0d3667399d01..a78d21fa9f29 100644
--- a/include/trace/events/habanalabs.h
+++ b/include/trace/events/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2022-2023 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */

From 26f5b6c364382803e4c29743bf8e9fd0de161190 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 14:54:01 +0300
Subject: [PATCH 66/77] accel/habanalabs: change Greco to Gaudi2

Greco was not upstreamed so no point of mentioning it here.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 include/uapi/drm/habanalabs_accel.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index dfe47db24ae6..347c7b62e60e 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
  *
- * Copyright 2016-2022 HabanaLabs, Ltd.
+ * Copyright 2016-2023 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -885,11 +885,11 @@ enum hl_server_type {
  * @dram_enabled: Whether the DRAM is enabled.
  * @security_enabled: Whether security is enabled on device.
  * @mme_master_slave_mode: Indicate whether the MME is working in master/slave
- *                         configuration. Relevant for Greco and later.
+ *                         configuration. Relevant for Gaudi2 and later.
  * @cpucp_version: The CPUCP f/w version.
  * @card_name: The card name as passed by the f/w.
  * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled.
- *                        Relevant for Greco and later.
+ *                        Relevant for Gaudi2 and later.
  * @dram_page_size: The DRAM physical page size.
  * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled.
  *                     Relevant for Gaudi2 and later.
@@ -1425,7 +1425,7 @@ union hl_cb_args {
  *
  * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB:
  *      Indicates if the CB was allocated and mapped by userspace
- *      (relevant to greco and above). User allocated CB is a command buffer,
+ *      (relevant to Gaudi2 and later). User allocated CB is a command buffer,
  *      allocated by the user, via malloc (or similar). After allocating the
  *      CB, the user invokes - “memory ioctl” to map the user memory into a
  *      device virtual address. The user provides this address via the
@@ -1450,7 +1450,7 @@ struct hl_cs_chunk {
 		 * a DRAM address of the internal CB. In Gaudi, this might also
 		 * represent a mapped host address of the CB.
 		 *
-		 * Greco onwards:
+		 * Gaudi2 onwards:
 		 * For H/W queue, this represents either a Handle of CB on the
 		 * Host, or an SRAM, a DRAM, or a mapped host address of the CB.
 		 *
@@ -2227,7 +2227,7 @@ struct hl_debug_args {
  * internal. The driver will get completion notifications from the device only
  * on JOBS which are enqueued in the external queues.
  *
- * Greco onwards:
+ * Gaudi2 onwards:
  * There is a single type of queue for all types of engines, either DMA engines
  * for transfers from/to the host or inside the device, or compute engines.
  * The driver will get completion notifications from the device for all queues.

From b5305d23aae50d725622ae058466d4603621d6d9 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 15:06:58 +0300
Subject: [PATCH 67/77] accel/habanalabs/gaudi: remove unused structure
 definition

struct gaudi_nic_status is not used anywhere in the code.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 .../habanalabs/include/gaudi/gaudi_fw_if.h    | 32 -------------------
 1 file changed, 32 deletions(-)

diff --git a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
index 2dba02757d37..a2547f306750 100644
--- a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
+++ b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
@@ -44,38 +44,6 @@ struct eq_nic_sei_event {
 	__u8 pad[6];
 };
 
-/*
- * struct gaudi_nic_status - describes the status of a NIC port.
- * @port: NIC port index.
- * @bad_format_cnt: e.g. CRC.
- * @responder_out_of_sequence_psn_cnt: e.g NAK.
- * @high_ber_reinit_cnt: link reinit due to high BER.
- * @correctable_err_cnt: e.g. bit-flip.
- * @uncorrectable_err_cnt: e.g. MAC errors.
- * @retraining_cnt: re-training counter.
- * @up: is port up.
- * @pcs_link: has PCS link.
- * @phy_ready: is PHY ready.
- * @auto_neg: is Autoneg enabled.
- * @timeout_retransmission_cnt: timeout retransmission events
- * @high_ber_cnt: high ber events
- */
-struct gaudi_nic_status {
-	__u32 port;
-	__u32 bad_format_cnt;
-	__u32 responder_out_of_sequence_psn_cnt;
-	__u32 high_ber_reinit;
-	__u32 correctable_err_cnt;
-	__u32 uncorrectable_err_cnt;
-	__u32 retraining_cnt;
-	__u8 up;
-	__u8 pcs_link;
-	__u8 phy_ready;
-	__u8 auto_neg;
-	__u32 timeout_retransmission_cnt;
-	__u32 high_ber_cnt;
-};
-
 struct gaudi_cold_rst_data {
 	union {
 		struct {

From e5873f6b914918797219ca7515afd4bbfa5b8ec4 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 15:10:36 +0300
Subject: [PATCH 68/77] accel/habanalabs: remove unused field

flags in struct wait_interrupt_data is not used anywhere so remove it.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/command_submission.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 751d2c7d3fb8..e5ca490b7fca 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -47,7 +47,6 @@ struct wait_interrupt_data {
 	u64 cq_offset;
 	u64 target_value;
 	u64 intr_timeout_us;
-	unsigned long flags;
 };
 
 static void job_wq_completion(struct work_struct *work);

From 6fc69ca84af7cb78fca05d4b44b55098237d8d9e Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 15:25:26 +0300
Subject: [PATCH 69/77] accel/habanalabs: print device name when it is removed

Notifies the user which device was removed. It is important in
a server with multiple devices.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index b1d330cd2537..9711e8fc979d 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -2420,7 +2420,7 @@ void hl_device_fini(struct hl_device *hdev)
 	u64 reset_sec;
 	int i, rc;
 
-	dev_info(hdev->dev, "Removing device\n");
+	dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev));
 
 	hdev->device_fini_pending = 1;
 	flush_delayed_work(&hdev->device_reset_work.reset_work);

From 87c60e23f26bac0a9d6dbcd020ffce483997990c Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 16:34:28 +0300
Subject: [PATCH 70/77] accel/habanalabs: remove leftover code

This code was added as part of a bigger feature which was never
upstreamed, so remove this code.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/command_submission.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index e5ca490b7fca..4f7b70d9754c 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -1751,16 +1751,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 		/* Need to wait for restore completion before execution phase */
 		if (num_chunks) {
 			enum hl_cs_wait_status status;
-wait_again:
+
 			ret = _hl_cs_wait_ioctl(hdev, ctx,
 					jiffies_to_usecs(hdev->timeout_jiffies),
 					*cs_seq, &status, NULL);
 			if (ret) {
-				if (ret == -ERESTARTSYS) {
-					usleep_range(100, 200);
-					goto wait_again;
-				}
-
 				dev_err(hdev->dev,
 					"Restore CS for context %d failed to complete %d\n",
 					ctx->asid, ret);

From 4355f2c32274ed918f00c9dbecb070f419b1d3cf Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 16:36:33 +0300
Subject: [PATCH 71/77] accel/habanalabs/gaudi: remove define used for
 simulator

We don't support simulator in upstream.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/habanalabs.h | 2 --
 drivers/accel/habanalabs/gaudi/gaudi.c       | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index b1b1c51aa5b1..a8ccc04e7f92 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -85,8 +85,6 @@ struct hl_fpriv;
 
 #define HL_PCI_ELBI_TIMEOUT_MSEC	10 /* 10ms */
 
-#define HL_SIM_MAX_TIMEOUT_US		100000000 /* 100s */
-
 #define HL_INVALID_QUEUE		UINT_MAX
 
 #define HL_COMMON_USER_CQ_INTERRUPT_ID	0xFFF
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index 31e04af46876..53292d4c15c8 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -4623,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
 static int gaudi_scrub_device_mem(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
-			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
+	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
 	u64 addr, size, val = hdev->memory_scrub_val;
 	ktime_t timeout;
 	int rc = 0;

From 1630d14f8db0f2e41a5e1cd9cc90b41e899d26df Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 21 Sep 2023 16:43:36 +0300
Subject: [PATCH 72/77] accel/habanalabs: minor cosmetic update to habanalabs.h

- Update copyright years
- Align fields in struct hl_userptr
- Fix comments

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/common/habanalabs.h | 31 ++++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index a8ccc04e7f92..2a3acdbf9171 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2016-2022 HabanaLabs, Ltd.
+ * Copyright 2016-2023 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -553,8 +553,7 @@ struct hl_hints_range {
  *              allocated with huge pages.
  * @hints_dram_reserved_va_range: dram hint addresses reserved range.
  * @hints_host_reserved_va_range: host hint addresses reserved range.
- * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
- *                                      range.
+ * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -593,7 +592,7 @@ struct hl_hints_range {
  * @mmu_pte_size: PTE size in MMU page tables.
  * @mmu_hop_table_size: MMU hop table size.
  * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
- * @dram_page_size: page size for MMU DRAM allocation.
+ * @dram_page_size: The DRAM physical page size.
  * @cfg_size: configuration space size on SRAM.
  * @sram_size: total size of SRAM.
  * @max_asid: maximum number of open contexts (ASIDs).
@@ -695,7 +694,7 @@ struct hl_hints_range {
  * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
  * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
  * @supports_user_set_page_size: true if user can set the allocation page size.
- * @dma_mask: the dma mask to be set for this device
+ * @dma_mask: the dma mask to be set for this device.
  * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
  * @supports_engine_modes: true if changing engines/engine_cores modes is supported.
  * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
@@ -1959,17 +1958,17 @@ struct hl_ctx_mgr {
  * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
  */
 struct hl_userptr {
-	enum vm_type		vm_type; /* must be first */
-	struct list_head	job_node;
-	struct page		**pages;
-	unsigned int		npages;
-	struct sg_table		*sgt;
-	enum dma_data_direction dir;
-	struct list_head	debugfs_list;
-	pid_t			pid;
-	u64			addr;
-	u64			size;
-	u8			dma_mapped;
+	enum vm_type			vm_type; /* must be first */
+	struct list_head		job_node;
+	struct page			**pages;
+	unsigned int			npages;
+	struct sg_table			*sgt;
+	enum dma_data_direction		dir;
+	struct list_head		debugfs_list;
+	pid_t				pid;
+	u64				addr;
+	u64				size;
+	u8				dma_mapped;
 };
 
 /**

From c1805bf36a2786db7a745ef62cbfffa2015a0476 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 Sep 2023 09:21:53 +0200
Subject: [PATCH 73/77] accel/habanalabs: add missing debugfs function stubs

Two function stubs were removed in an earlier commit but are now needed
again:

drivers/accel/habanalabs/common/device.c: In function 'hl_device_init':
drivers/accel/habanalabs/common/device.c:2231:14: error: implicit declaration of function 'hl_debugfs_device_init'; did you mean 'drm_debugfs_dev_init'? [-Werror=implicit-function-declaration]
 2231 |         rc = hl_debugfs_device_init(hdev);
drivers/accel/habanalabs/common/device.c:2367:9: error: implicit declaration of function 'hl_debugfs_device_fini'; did you mean 'hl_debugfs_remove_file'? [-Werror=implicit-function-declaration]
 2367 |         hl_debugfs_device_fini(hdev);
      |         ^~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 2a3acdbf9171..1655c101c705 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -4041,6 +4041,15 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
 
 #else
 
+static inline int hl_debugfs_device_init(struct hl_device *hdev)
+{
+	return 0;
+}
+
+static inline void hl_debugfs_device_fini(struct hl_device *hdev)
+{
+}
+
 static inline void hl_debugfs_add_device(struct hl_device *hdev)
 {
 }

From 2ba0236f5ba554425d7d229c71236843d7b06ee7 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Tue, 22 Aug 2023 16:48:14 +0300
Subject: [PATCH 74/77] accel/habanalabs: remove wrong doc for
 init_phys_pg_pack_from_userptr

The function does not pin the pages so remove that from the inline doc.

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/memory.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index ba59e921236e..0b8689fe0b64 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
  *                      physical pages
  *
  * This function does the following:
- * - Pin the physical pages related to the given virtual block.
  * - Create a physical page pack from the physical pages related to the given
  *   virtual block.
  */

From 84190b92cc1fd6cdbcb90b65dedeb32fb49336e7 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Wed, 20 Sep 2023 12:22:02 +0300
Subject: [PATCH 75/77] accel/habanalabs: fix bug in decoder wait for cs
 completion

The decoder interrupts are handled in the interrupt context
same as all user interrupts.
In such case, the wait list should be protected by
spin_lock_irqsave in order to avoid deadlock that might happen
with the user submission flow.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../accel/habanalabs/common/command_submission.c   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 4f7b70d9754c..3aa6eeef443b 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -3526,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 				u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	unsigned long timeout;
+	unsigned long timeout, flags;
 	u64 completion_value;
 	long completion_rc;
 	int rc = 0;
@@ -3546,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	/* Add pending user interrupt to relevant list for the interrupt
 	 * handler to monitor
 	 */
-	spin_lock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_add_tail(&pend->list_node, &interrupt->wait_list_head);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
@@ -3579,14 +3579,14 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	 * If comparison fails, keep waiting until timeout expires
 	 */
 	if (completion_rc > 0) {
-		spin_lock(&interrupt->wait_list_lock);
+		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 		/* reinit_completion must be called before we check for user
 		 * completion value, otherwise, if interrupt is received after
 		 * the comparison and before the next wait_for_completion,
 		 * we will reach timeout and fail
 		 */
 		reinit_completion(&pend->fence.completion);
-		spin_unlock(&interrupt->wait_list_lock);
+		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
 			dev_err(hdev->dev, "Failed to copy completion value from user\n");
@@ -3623,9 +3623,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	}
 
 remove_pending_user_interrupt:
-	spin_lock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_del(&pend->list_node);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
 

From 0426e03126fba3e3e8c549504283f7213d31831f Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 19 Sep 2023 10:32:23 +0300
Subject: [PATCH 76/77] accel/habanalabs/gaudi2: perform hard-reset upon PCIe
 AXI drain event

Non-completed transactions from PCIe towards the device are handled by
the AXI drain mechanism. This handling is in the PCIe level, but the
transactions are still there in the device consuming some queues
entries, and therefore the device must be reset.
Modify to perform hard-reset upon PCIe AXI drain events.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
index 57e661771b6c..b2dbe1f64430 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
@@ -1293,7 +1293,7 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 		 .name = "" },
 	{ .fc_id = 631, .cpu_id = 128, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
 		 .name = "PCIE_P2P_MSIX" },
-	{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+	{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
 		 .name = "PCIE_DRAIN_COMPLETE" },
 	{ .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
 		 .name = "TPC0_BMON_SPMU" },

From 4db74c0fdeb8138f6438d42a015c5dcdb2e6874c Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 28 Sep 2023 16:12:30 +0300
Subject: [PATCH 77/77] accel/habanalabs/gaudi2: fix spmu mask creation

event_types_num received from the user can be 0. In that case, the
event_mask should be 0.

In addition, to create a correct mask we need to match the number
of event types to the bit location such that bit 0 represents a single
event type, bit 1 represents 2 types and so on.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
---
 drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
index 14a855cdc96b..2423620ff358 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@@ -2500,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa
 		 * set enabled events mask based on input->event_types_num
 		 */
 		event_mask = 0x80000000;
-		event_mask |= GENMASK(input->event_types_num, 0);
+		if (input->event_types_num)
+			event_mask |= GENMASK(input->event_types_num - 1, 0);
 
 		WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask);
 	} else {