This tag contains habanalabs driver and accel changes for v6.4:

- uAPI changes: - Add opcodes to the CS ioctl to allow user to stall/resume specific engines inside Gaudi2. This is to allow the user to perform power testing/measurements when training different topologies. - Expose in the INFO ioctl the amount of device memory that the driver and f/w reserve for themselves. - Expose in the INFO ioctl a bit-mask of the available rotator engines in Gaudi2. This is to align with other engines that are already exposed. - Expose in the INFO ioctl the register's address of the f/w that should be used to trigger interrupts from within the user's code running in the compute engines. - Add a critical-event bit in the eventfd bitmask so the user will know the event that was received was critical, and a reset will now occur - Expose in the INFO ioctl two new opcodes to fetch information on h/w and f/w events. The events recorded are the events that were reported in the eventfd. - New features and improvements: - Add a dedicated interrupt ID in MSI-X in the device to the notification of an unexpected user-related event in Gaudi2. Handle it in the driver by reporting this event. - Allow the user to fetch the device memory current usage even when the device is undergoing compute-reset (a reset type that only clears the compute engines). - Enable graceful reset mechanism for compute-reset. This will give the user a few seconds before the device is reset. For example, the user can, during that time, perform certain device operations (dump data for debug) or close the device in an orderly fashion. - Align the decoder with the rest of the engines in regard to notification to the user about interrupts and in regard to performing graceful reset when needed (instead of immediate reset). - Add support for assert interrupt from the TPC engine. - Get the reset type that is necessary to perform per event from the auto-generated irq_map array. - Print the specific reason why a device is still in use when notifying to the user about it (after the user closed the device's FD). - Move to threaded IRQ when handling interrupts of workload completions. - Firmware related fixes: - Fix RAZWI event handler to match newest f/w version. - Read error cause register in dma core events because the f/w doesn't do that. - Increase maximum time to wait for completion of Gaudi2 reset due to f/w bug. - Align to the latest firmware specs. - Enforce the release order of the compute device and dma-buf. i.e increment the device file refcount for any dma-buf that was exported for that device. This will make sure the compute device release function won't be called until the user closes all the FDs of the relevant dma-bufs. Without this change, closing the device's FD before/without closing the dma-buf's FD would always lead to hard-reset of the device. - Fix a link in the drm documentation to correctly point to the accel section. - Compilation warnings cleanups - Misc bug fixes and code cleanups -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmQYfcAACgkQZR1NuKta 54DB4Af/SuiHZkVXwr+yHPv9El726rz9ZQD7mQtzNmehWGonwAvz15yqocNMUSbF JbqE/vrZjvbXrP1Uv5UrlRVdnFHSPV18VnHU4BMS/WOm19SsR6vZ0QOXOoa6/AUb w+kF3D//DbFI4/mTGfpH5/pzwu51ti8aVktosPFlHIa8iI8CB4/4IV+ivQ8UW4oK HyDRkIvHdRmER7vGOfhwhsr4zdqSlJBYrv3C3Z1dkSYBPW/5ICbiM1UlKycwdYKI cajQBSdUQwUCWnI+i8RmSy3kjNO6OE4XRUvTv89F2bQeyK/1rJLG2m2xZR/Ml/o5 7Cgvbn0hWZyeqe7OObYiBlSOBSehCA== =wclm -----END PGP SIGNATURE----- Merge tag 'drm-habanalabs-next-2023-03-20' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into drm-next This tag contains habanalabs driver and accel changes for v6.4: - uAPI changes: - Add opcodes to the CS ioctl to allow user to stall/resume specific engines inside Gaudi2. This is to allow the user to perform power testing/measurements when training different topologies. - Expose in the INFO ioctl the amount of device memory that the driver and f/w reserve for themselves. - Expose in the INFO ioctl a bit-mask of the available rotator engines in Gaudi2. This is to align with other engines that are already exposed. - Expose in the INFO ioctl the register's address of the f/w that should be used to trigger interrupts from within the user's code running in the compute engines. - Add a critical-event bit in the eventfd bitmask so the user will know the event that was received was critical, and a reset will now occur - Expose in the INFO ioctl two new opcodes to fetch information on h/w and f/w events. The events recorded are the events that were reported in the eventfd. - New features and improvements: - Add a dedicated interrupt ID in MSI-X in the device to the notification of an unexpected user-related event in Gaudi2. Handle it in the driver by reporting this event. - Allow the user to fetch the device memory current usage even when the device is undergoing compute-reset (a reset type that only clears the compute engines). - Enable graceful reset mechanism for compute-reset. This will give the user a few seconds before the device is reset. For example, the user can, during that time, perform certain device operations (dump data for debug) or close the device in an orderly fashion. - Align the decoder with the rest of the engines in regard to notification to the user about interrupts and in regard to performing graceful reset when needed (instead of immediate reset). - Add support for assert interrupt from the TPC engine. - Get the reset type that is necessary to perform per event from the auto-generated irq_map array. - Print the specific reason why a device is still in use when notifying to the user about it (after the user closed the device's FD). - Move to threaded IRQ when handling interrupts of workload completions. - Firmware related fixes: - Fix RAZWI event handler to match newest f/w version. - Read error cause register in dma core events because the f/w doesn't do that. - Increase maximum time to wait for completion of Gaudi2 reset due to f/w bug. - Align to the latest firmware specs. - Enforce the release order of the compute device and dma-buf. i.e increment the device file refcount for any dma-buf that was exported for that device. This will make sure the compute device release function won't be called until the user closes all the FDs of the relevant dma-bufs. Without this change, closing the device's FD before/without closing the dma-buf's FD would always lead to hard-reset of the device. - Fix a link in the drm documentation to correctly point to the accel section. - Compilation warnings cleanups - Misc bug fixes and code cleanups Signed-off-by: Dave Airlie <airlied@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmQYfcAACgkQZR1NuKta # 54DB4Af/SuiHZkVXwr+yHPv9El726rz9ZQD7mQtzNmehWGonwAvz15yqocNMUSbF # JbqE/vrZjvbXrP1Uv5UrlRVdnFHSPV18VnHU4BMS/WOm19SsR6vZ0QOXOoa6/AUb # w+kF3D//DbFI4/mTGfpH5/pzwu51ti8aVktosPFlHIa8iI8CB4/4IV+ivQ8UW4oK # HyDRkIvHdRmER7vGOfhwhsr4zdqSlJBYrv3C3Z1dkSYBPW/5ICbiM1UlKycwdYKI # cajQBSdUQwUCWnI+i8RmSy3kjNO6OE4XRUvTv89F2bQeyK/1rJLG2m2xZR/Ml/o5 # 7Cgvbn0hWZyeqe7OObYiBlSOBSehCA== # =wclm # -----END PGP SIGNATURE----- # gpg: Signature made Tue 21 Mar 2023 01:37:36 AEST # gpg: using RSA key ED311BA00042EF52DCB412C5651D4DB8AB5AE780 # gpg: Can't check signature: No public key From: Oded Gabbay <ogabbay@kernel.org> Link: https://patchwork.freedesktop.org/patch/msgid/20230320154026.GA766126@ogabbay-vm-u20.habana-labs.com
2023-03-22 10:35:45 +10:00 · 2023-03-22 10:35:45 +10:00 · d36d68fd19
parent d240daa2c4 75b4457530
commit d36d68fd19
30 changed files with 4699 additions and 3781 deletions
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@ -14,10 +14,10 @@
 #define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
 			HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
 			HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
-			HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
+			HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)


-#define MAX_TS_ITER_NUM 10
+#define MAX_TS_ITER_NUM 100

 /**
 * enum hl_cs_wait_status - cs wait status
@ -657,7 +657,7 @@ static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
 	/*
 	 * we get refcount upon reservation of signals or signal/wait cs for the
 	 * hw_sob object, and need to put it when the first staged cs
-	 * (which cotains the encaps signals) or cs signal/wait is completed.
+	 * (which contains the encaps signals) or cs signal/wait is completed.
 	 */
 	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
 			(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
@ -1082,9 +1082,8 @@ static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
 	struct hl_user_pending_interrupt *pend, *temp;
-	unsigned long flags;

-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	spin_lock(&interrupt->wait_list_lock);
 	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
 		if (pend->ts_reg_info.buf) {
 			list_del(&pend->wait_list_node);
@ -1095,7 +1094,7 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 			complete_all(&pend->fence.completion);
 		}
 	}
-	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+	spin_unlock(&interrupt->wait_list_lock);
 }

 void hl_release_pending_user_interrupts(struct hl_device *hdev)
@ -1168,6 +1167,22 @@ static void cs_completion(struct work_struct *work)
 		hl_complete_job(hdev, job);
 }

+u32 hl_get_active_cs_num(struct hl_device *hdev)
+{
+	u32 active_cs_num = 0;
+	struct hl_cs *cs;
+
+	spin_lock(&hdev->cs_mirror_lock);
+
+	list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
+		if (!cs->completed)
+			active_cs_num++;
+
+	spin_unlock(&hdev->cs_mirror_lock);
+
+	return active_cs_num;
+}
+
 static int validate_queue_index(struct hl_device *hdev,
 				struct hl_cs_chunk *chunk,
 				enum hl_queue_type *queue_type,
@ -1304,6 +1319,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
 		return CS_UNRESERVE_SIGNALS;
 	else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
 		return CS_TYPE_ENGINE_CORE;
+	else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND)
+		return CS_TYPE_ENGINES;
 	else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
 		return CS_TYPE_FLUSH_PCI_HBW_WRITES;
 	else
@ -2429,10 +2446,13 @@ out:
 static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
 						u32 num_engine_cores, u32 core_command)
 {
-	int rc;
 	struct hl_device *hdev = hpriv->hdev;
 	void __user *engine_cores_arr;
 	u32 *cores;
+	int rc;
+
+	if (!hdev->asic_prop.supports_engine_modes)
+		return -EPERM;

 	if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
 		dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
@ -2461,6 +2481,48 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
 	return rc;
 }

+static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
+						u32 num_engines, enum hl_engine_command command)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	u32 *engines, max_num_of_engines;
+	void __user *engines_arr;
+	int rc;
+
+	if (!hdev->asic_prop.supports_engine_modes)
+		return -EPERM;
+
+	if (command >= HL_ENGINE_COMMAND_MAX) {
+		dev_err(hdev->dev, "Engine command is invalid\n");
+		return -EINVAL;
+	}
+
+	max_num_of_engines = hdev->asic_prop.max_num_of_engines;
+	if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT)
+		max_num_of_engines = hdev->asic_prop.num_engine_cores;
+
+	if (!num_engines || num_engines > max_num_of_engines) {
+		dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
+		return -EINVAL;
+	}
+
+	engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr;
+	engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) {
+		dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
+		kfree(engines);
+		return -EFAULT;
+	}
+
+	rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
+	kfree(engines);
+
+	return rc;
+}
+
 static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
 {
 	struct hl_device *hdev = hpriv->hdev;
@ -2532,6 +2594,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
 				args->in.num_engine_cores, args->in.core_command);
 		break;
+	case CS_TYPE_ENGINES:
+		rc = cs_ioctl_engines(hpriv, args->in.engines,
+				args->in.num_engines, args->in.engine_command);
+		break;
 	case CS_TYPE_FLUSH_PCI_HBW_WRITES:
 		rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
 		break;
@ -3143,8 +3209,9 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 	struct hl_user_pending_interrupt *cb_last =
 			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
 			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
-	unsigned long flags, iter_counter = 0;
+	unsigned long iter_counter = 0;
 	u64 current_cq_counter;
+	ktime_t timestamp;

 	/* Validate ts_offset not exceeding last max */
 	if (requested_offset_record >= cb_last) {
@ -3153,8 +3220,10 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 		return -EINVAL;
 	}

+	timestamp = ktime_get();
+
 start_over:
-	spin_lock_irqsave(wait_list_lock, flags);
+	spin_lock(wait_list_lock);

 	/* Unregister only if we didn't reach the target value
 	 * since in this case there will be no handling in irq context
@ -3165,7 +3234,7 @@ start_over:
 		current_cq_counter = *requested_offset_record->cq_kernel_addr;
 		if (current_cq_counter < requested_offset_record->cq_target_value) {
 			list_del(&requested_offset_record->wait_list_node);
-			spin_unlock_irqrestore(wait_list_lock, flags);
+			spin_unlock(wait_list_lock);

 			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
 			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
@ -3176,13 +3245,14 @@ start_over:
 			dev_dbg(buf->mmg->dev,
 				"ts node in middle of irq handling\n");

-			/* irq handling in the middle give it time to finish */
-			spin_unlock_irqrestore(wait_list_lock, flags);
-			usleep_range(1, 10);
+			/* irq thread handling in the middle give it time to finish */
+			spin_unlock(wait_list_lock);
+			usleep_range(100, 1000);
 			if (++iter_counter == MAX_TS_ITER_NUM) {
 				dev_err(buf->mmg->dev,
-					"handling registration interrupt took too long!!\n");
-				return -EINVAL;
+					"Timestamp offset processing reached timeout of %lld ms\n",
+					ktime_ms_delta(ktime_get(), timestamp));
+				return -EAGAIN;
 			}

 			goto start_over;
@ -3197,7 +3267,7 @@ start_over:
 				(u64 *) cq_cb->kernel_address + cq_offset;
 		requested_offset_record->cq_target_value = target_value;

-		spin_unlock_irqrestore(wait_list_lock, flags);
+		spin_unlock(wait_list_lock);
 	}

 	*pend = requested_offset_record;
@ -3217,7 +3287,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	struct hl_user_pending_interrupt *pend;
 	struct hl_mmap_mem_buf *buf;
 	struct hl_cb *cq_cb;
-	unsigned long timeout, flags;
+	unsigned long timeout;
 	long completion_rc;
 	int rc = 0;

@ -3264,7 +3334,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		pend->cq_target_value = target_value;
 	}

-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	spin_lock(&interrupt->wait_list_lock);

 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
@ -3272,7 +3342,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	if (*pend->cq_kernel_addr >= target_value) {
 		if (register_ts_record)
 			pend->ts_reg_info.in_use = 0;
-		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+		spin_unlock(&interrupt->wait_list_lock);

 		*status = HL_WAIT_CS_STATUS_COMPLETED;

@ -3284,7 +3354,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 			goto set_timestamp;
 		}
 	} else if (!timeout_us) {
-		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+		spin_unlock(&interrupt->wait_list_lock);
 		*status = HL_WAIT_CS_STATUS_BUSY;
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
@ -3309,7 +3379,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		pend->ts_reg_info.in_use = 1;

 	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+	spin_unlock(&interrupt->wait_list_lock);

 	if (register_ts_record) {
 		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
@ -3353,9 +3423,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * for ts record, the node will be deleted in the irq handler after
 	 * we reach the target value.
 	 */
-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	spin_lock(&interrupt->wait_list_lock);
 	list_del(&pend->wait_list_node);
-	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+	spin_unlock(&interrupt->wait_list_lock);

 set_timestamp:
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
@ -3383,7 +3453,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 				u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	unsigned long timeout, flags;
+	unsigned long timeout;
 	u64 completion_value;
 	long completion_rc;
 	int rc = 0;
@ -3403,9 +3473,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	/* Add pending user interrupt to relevant list for the interrupt
 	 * handler to monitor
 	 */
-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	spin_lock(&interrupt->wait_list_lock);
 	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+	spin_unlock(&interrupt->wait_list_lock);

 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
@ -3436,14 +3506,14 @@ wait_again:
 	 * If comparison fails, keep waiting until timeout expires
 	 */
 	if (completion_rc > 0) {
-		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+		spin_lock(&interrupt->wait_list_lock);
 		/* reinit_completion must be called before we check for user
 		 * completion value, otherwise, if interrupt is received after
 		 * the comparison and before the next wait_for_completion,
 		 * we will reach timeout and fail
 		 */
 		reinit_completion(&pend->fence.completion);
-		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+		spin_unlock(&interrupt->wait_list_lock);

 		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
 			dev_err(hdev->dev, "Failed to copy completion value from user\n");
@ -3480,9 +3550,9 @@ wait_again:
 	}

 remove_pending_user_interrupt:
-	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	spin_lock(&interrupt->wait_list_lock);
 	list_del(&pend->wait_list_node);
-	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+	spin_unlock(&interrupt->wait_list_lock);

 	*timestamp = ktime_to_ns(pend->fence.timestamp);

--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@ -258,7 +258,7 @@ static int vm_show(struct seq_file *s, void *data)
 	if (!dev_entry->hdev->mmu_enable)
 		return 0;

-	spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+	mutex_lock(&dev_entry->ctx_mem_hash_mutex);

 	list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
 		once = false;
@ -329,7 +329,7 @@ static int vm_show(struct seq_file *s, void *data)

 	}

-	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);

 	ctx = hl_get_compute_ctx(dev_entry->hdev);
 	if (ctx) {
@ -1583,59 +1583,183 @@ static const struct file_operations hl_debugfs_fops = {
 	.release = single_release,
 };

-static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry)
+static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry, struct dentry *root)
 {
 	debugfs_create_u8("i2c_bus",
 				0644,
-				dev_entry->root,
+				root,
 				&dev_entry->i2c_bus);

 	debugfs_create_u8("i2c_addr",
 				0644,
-				dev_entry->root,
+				root,
 				&dev_entry->i2c_addr);

 	debugfs_create_u8("i2c_reg",
 				0644,
-				dev_entry->root,
+				root,
 				&dev_entry->i2c_reg);

 	debugfs_create_u8("i2c_len",
 				0644,
-				dev_entry->root,
+				root,
 				&dev_entry->i2c_len);

 	debugfs_create_file("i2c_data",
 				0644,
-				dev_entry->root,
+				root,
 				dev_entry,
 				&hl_i2c_data_fops);

 	debugfs_create_file("led0",
 				0200,
-				dev_entry->root,
+				root,
 				dev_entry,
 				&hl_led0_fops);

 	debugfs_create_file("led1",
 				0200,
-				dev_entry->root,
+				root,
 				dev_entry,
 				&hl_led1_fops);

 	debugfs_create_file("led2",
 				0200,
-				dev_entry->root,
+				root,
 				dev_entry,
 				&hl_led2_fops);
 }

+static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_entry *dev_entry,
+				struct dentry *root)
+{
+	int count = ARRAY_SIZE(hl_debugfs_list);
+	struct hl_debugfs_entry *entry;
+	int i;
+
+	debugfs_create_x64("memory_scrub_val",
+				0644,
+				root,
+				&hdev->memory_scrub_val);
+
+	debugfs_create_file("memory_scrub",
+				0200,
+				root,
+				dev_entry,
+				&hl_mem_scrub_fops);
+
+	debugfs_create_x64("addr",
+				0644,
+				root,
+				&dev_entry->addr);
+
+	debugfs_create_file("data32",
+				0644,
+				root,
+				dev_entry,
+				&hl_data32b_fops);
+
+	debugfs_create_file("data64",
+				0644,
+				root,
+				dev_entry,
+				&hl_data64b_fops);
+
+	debugfs_create_file("set_power_state",
+				0200,
+				root,
+				dev_entry,
+				&hl_power_fops);
+
+	debugfs_create_file("device",
+				0200,
+				root,
+				dev_entry,
+				&hl_device_fops);
+
+	debugfs_create_file("clk_gate",
+				0200,
+				root,
+				dev_entry,
+				&hl_clk_gate_fops);
+
+	debugfs_create_file("stop_on_err",
+				0644,
+				root,
+				dev_entry,
+				&hl_stop_on_err_fops);
+
+	debugfs_create_file("dump_security_violations",
+				0644,
+				root,
+				dev_entry,
+				&hl_security_violations_fops);
+
+	debugfs_create_file("dump_razwi_events",
+				0644,
+				root,
+				dev_entry,
+				&hl_razwi_check_fops);
+
+	debugfs_create_file("dma_size",
+				0200,
+				root,
+				dev_entry,
+				&hl_dma_size_fops);
+
+	debugfs_create_blob("data_dma",
+				0400,
+				root,
+				&dev_entry->data_dma_blob_desc);
+
+	debugfs_create_file("monitor_dump_trig",
+				0200,
+				root,
+				dev_entry,
+				&hl_monitor_dump_fops);
+
+	debugfs_create_blob("monitor_dump",
+				0400,
+				root,
+				&dev_entry->mon_dump_blob_desc);
+
+	debugfs_create_x8("skip_reset_on_timeout",
+				0644,
+				root,
+				&hdev->reset_info.skip_reset_on_timeout);
+
+	debugfs_create_file("state_dump",
+				0600,
+				root,
+				dev_entry,
+				&hl_state_dump_fops);
+
+	debugfs_create_file("timeout_locked",
+				0644,
+				root,
+				dev_entry,
+				&hl_timeout_locked_fops);
+
+	debugfs_create_u32("device_release_watchdog_timeout",
+				0644,
+				root,
+				&hdev->device_release_watchdog_timeout_sec);
+
+	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+		debugfs_create_file(hl_debugfs_list[i].name,
+					0444,
+					root,
+					entry,
+					&hl_debugfs_fops);
+		entry->info_ent = &hl_debugfs_list[i];
+		entry->dev_entry = dev_entry;
+	}
+}
+
 void hl_debugfs_add_device(struct hl_device *hdev)
 {
 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 	int count = ARRAY_SIZE(hl_debugfs_list);
-	struct hl_debugfs_entry *entry;
-	int i;

 	dev_entry->hdev = hdev;
 	dev_entry->entry_arr = kmalloc_array(count,
@ -1661,131 +1785,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 	spin_lock_init(&dev_entry->cs_spinlock);
 	spin_lock_init(&dev_entry->cs_job_spinlock);
 	spin_lock_init(&dev_entry->userptr_spinlock);
-	spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);
+	mutex_init(&dev_entry->ctx_mem_hash_mutex);

 	dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
 						hl_debug_root);

-	debugfs_create_x64("memory_scrub_val",
-				0644,
-				dev_entry->root,
-				&hdev->memory_scrub_val);
-
-	debugfs_create_file("memory_scrub",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_mem_scrub_fops);
-
-	debugfs_create_x64("addr",
-				0644,
-				dev_entry->root,
-				&dev_entry->addr);
-
-	debugfs_create_file("data32",
-				0644,
-				dev_entry->root,
-				dev_entry,
-				&hl_data32b_fops);
-
-	debugfs_create_file("data64",
-				0644,
-				dev_entry->root,
-				dev_entry,
-				&hl_data64b_fops);
-
-	debugfs_create_file("set_power_state",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_power_fops);
-
-	debugfs_create_file("device",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_device_fops);
-
-	debugfs_create_file("clk_gate",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_clk_gate_fops);
-
-	debugfs_create_file("stop_on_err",
-				0644,
-				dev_entry->root,
-				dev_entry,
-				&hl_stop_on_err_fops);
-
-	debugfs_create_file("dump_security_violations",
-				0644,
-				dev_entry->root,
-				dev_entry,
-				&hl_security_violations_fops);
-
-	debugfs_create_file("dump_razwi_events",
-				0644,
-				dev_entry->root,
-				dev_entry,
-				&hl_razwi_check_fops);
-
-	debugfs_create_file("dma_size",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_dma_size_fops);
-
-	debugfs_create_blob("data_dma",
-				0400,
-				dev_entry->root,
-				&dev_entry->data_dma_blob_desc);
-
-	debugfs_create_file("monitor_dump_trig",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_monitor_dump_fops);
-
-	debugfs_create_blob("monitor_dump",
-				0400,
-				dev_entry->root,
-				&dev_entry->mon_dump_blob_desc);
-
-	debugfs_create_x8("skip_reset_on_timeout",
-				0644,
-				dev_entry->root,
-				&hdev->reset_info.skip_reset_on_timeout);
-
-	debugfs_create_file("state_dump",
-				0600,
-				dev_entry->root,
-				dev_entry,
-				&hl_state_dump_fops);
-
-	debugfs_create_file("timeout_locked",
-				0644,
-				dev_entry->root,
-				dev_entry,
-				&hl_timeout_locked_fops);
-
-	debugfs_create_u32("device_release_watchdog_timeout",
-				0644,
-				dev_entry->root,
-				&hdev->device_release_watchdog_timeout_sec);
-
-	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
-		debugfs_create_file(hl_debugfs_list[i].name,
-					0444,
-					dev_entry->root,
-					entry,
-					&hl_debugfs_fops);
-		entry->info_ent = &hl_debugfs_list[i];
-		entry->dev_entry = dev_entry;
-	}
-
+	add_files_to_device(hdev, dev_entry, dev_entry->root);
 	if (!hdev->asic_prop.fw_security_enabled)
-		add_secured_nodes(dev_entry);
+		add_secured_nodes(dev_entry, dev_entry->root);
 }

 void hl_debugfs_remove_device(struct hl_device *hdev)
@ -1795,6 +1802,7 @@ void hl_debugfs_remove_device(struct hl_device *hdev)

 	debugfs_remove_recursive(entry->root);

+	mutex_destroy(&entry->ctx_mem_hash_mutex);
 	mutex_destroy(&entry->file_mutex);

 	vfree(entry->data_dma_blob_desc.data);
@ -1901,18 +1909,18 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
 {
 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;

-	spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+	mutex_lock(&dev_entry->ctx_mem_hash_mutex);
 	list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
-	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
 }

 void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
 {
 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;

-	spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+	mutex_lock(&dev_entry->ctx_mem_hash_mutex);
 	list_del(&ctx->debugfs_list);
-	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
 }

 /**
--- a/drivers/accel/habanalabs/common/decoder.c
+++ b/drivers/accel/habanalabs/common/decoder.c
@ -46,7 +46,7 @@ static void dec_print_abnrm_intr_source(struct hl_device *hdev, u32 irq_status)
 static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_id)
 {
 	bool reset_required = false;
-	u32 irq_status;
+	u32 irq_status, event_mask;

 	irq_status = RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);

@ -54,17 +54,27 @@ static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_

 	dec_print_abnrm_intr_source(hdev, irq_status);

-	if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK)
-		reset_required = true;
-
 	/* Clear the interrupt */
 	WREG32(base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status);

 	/* Flush the interrupt clear */
 	RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);

-	if (reset_required)
-		hl_device_reset(hdev, HL_DRV_RESET_HARD);
+	if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK) {
+		reset_required = true;
+		event_mask = HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+	} else if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK) {
+		event_mask = HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
+	} else {
+		event_mask = HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+	}
+
+	if (reset_required) {
+		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+		hl_device_cond_reset(hdev, 0, event_mask);
+	} else {
+		hl_notifier_event_send_all(hdev, event_mask);
+	}
 }

 static void dec_completion_abnrm(struct work_struct *work)
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@ -22,7 +22,6 @@

 enum dma_alloc_type {
 	DMA_ALLOC_COHERENT,
-	DMA_ALLOC_CPU_ACCESSIBLE,
 	DMA_ALLOC_POOL,
 };

@ -121,9 +120,6 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t
 	case DMA_ALLOC_COHERENT:
 		ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag);
 		break;
-	case DMA_ALLOC_CPU_ACCESSIBLE:
-		ptr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
-		break;
 	case DMA_ALLOC_POOL:
 		ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle);
 		break;
@ -147,9 +143,6 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
 	case DMA_ALLOC_COHERENT:
 		hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle);
 		break;
-	case DMA_ALLOC_CPU_ACCESSIBLE:
-		hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, cpu_addr);
-		break;
 	case DMA_ALLOC_POOL:
 		hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
 		break;
@ -170,18 +163,6 @@ void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void
 	hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller);
 }

-void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
-						dma_addr_t *dma_handle, const char *caller)
-{
-	return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
-}
-
-void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
-						const char *caller)
-{
-	hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
-}
-
 void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
 					dma_addr_t *dma_handle, const char *caller)
 {
@ -194,6 +175,16 @@ void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_
 	hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller);
 }

+void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)
+{
+	return hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
+}
+
+void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
+{
+	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr);
+}
+
 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@ -389,18 +380,17 @@ bool hl_ctrl_device_operational(struct hl_device *hdev,
 static void print_idle_status_mask(struct hl_device *hdev, const char *message,
 					u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE])
 {
-	u32 pad_width[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {};
-
-	BUILD_BUG_ON(HL_BUSY_ENGINES_MASK_EXT_SIZE != 4);
-
-	pad_width[3] = idle_mask[3] ? 16 : 0;
-	pad_width[2] = idle_mask[2] || pad_width[3] ? 16 : 0;
-	pad_width[1] = idle_mask[1] || pad_width[2] ? 16 : 0;
-	pad_width[0] = idle_mask[0] || pad_width[1] ? 16 : 0;
-
-	dev_err(hdev->dev, "%s (mask %0*llx_%0*llx_%0*llx_%0*llx)\n",
-		message, pad_width[3], idle_mask[3], pad_width[2], idle_mask[2],
-		pad_width[1], idle_mask[1], pad_width[0], idle_mask[0]);
+	if (idle_mask[3])
+		dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n",
+			message, idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
+	else if (idle_mask[2])
+		dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n",
+			message, idle_mask[2], idle_mask[1], idle_mask[0]);
+	else if (idle_mask[1])
+		dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n",
+			message, idle_mask[1], idle_mask[0]);
+	else
+		dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]);
 }

 static void hpriv_release(struct kref *ref)
@ -423,6 +413,9 @@ static void hpriv_release(struct kref *ref)
 	mutex_destroy(&hpriv->ctx_lock);
 	mutex_destroy(&hpriv->restore_phase_mutex);

+	/* There should be no memory buffers at this point and handles IDR can be destroyed */
+	hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
+
 	/* Device should be reset if reset-upon-device-release is enabled, or if there is a pending
 	 * reset that waits for device release.
 	 */
@ -492,6 +485,36 @@ int hl_hpriv_put(struct hl_fpriv *hpriv)
 	return kref_put(&hpriv->refcount, hpriv_release);
 }

+static void print_device_in_use_info(struct hl_device *hdev, const char *message)
+{
+	u32 active_cs_num, dmabuf_export_cnt;
+	bool unknown_reason = true;
+	char buf[128];
+	size_t size;
+	int offset;
+
+	size = sizeof(buf);
+	offset = 0;
+
+	active_cs_num = hl_get_active_cs_num(hdev);
+	if (active_cs_num) {
+		unknown_reason = false;
+		offset += scnprintf(buf + offset, size - offset, " [%u active CS]", active_cs_num);
+	}
+
+	dmabuf_export_cnt = atomic_read(&hdev->dmabuf_export_cnt);
+	if (dmabuf_export_cnt) {
+		unknown_reason = false;
+		offset += scnprintf(buf + offset, size - offset, " [%u exported dma-buf]",
+					dmabuf_export_cnt);
+	}
+
+	if (unknown_reason)
+		scnprintf(buf + offset, size - offset, " [unknown reason]");
+
+	dev_notice(hdev->dev, "%s%s\n", message, buf);
+}
+
 /*
 * hl_device_release - release function for habanalabs device
 *
@ -514,17 +537,20 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	}

 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
+
+	/* Memory buffers might be still in use at this point and thus the handles IDR destruction
+	 * is postponed to hpriv_release().
+	 */
 	hl_mem_mgr_fini(&hpriv->mem_mgr);

 	hdev->compute_ctx_in_release = 1;

 	if (!hl_hpriv_put(hpriv)) {
-		dev_notice(hdev->dev, "User process closed FD but device still in use\n");
+		print_device_in_use_info(hdev, "User process closed FD but device still in use");
 		hl_device_reset(hdev, HL_DRV_RESET_HARD);
 	}

-	hdev->last_open_session_duration_jif =
-		jiffies - hdev->last_successful_open_jif;
+	hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif;

 	return 0;
 }
@ -617,7 +643,7 @@ static void device_release_func(struct device *dev)
 * device_init_cdev - Initialize cdev and device for habanalabs device
 *
 * @hdev: pointer to habanalabs device structure
- * @hclass: pointer to the class object of the device
+ * @class: pointer to the class object of the device
 * @minor: minor number of the specific device
 * @fpos: file operations to install for this device
 * @name: name of the device as it will appear in the filesystem
@ -626,7 +652,7 @@ static void device_release_func(struct device *dev)
 *
 * Initialize a cdev and a Linux device for habanalabs's device.
 */
-static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
+static int device_init_cdev(struct hl_device *hdev, struct class *class,
 				int minor, const struct file_operations *fops,
 				char *name, struct cdev *cdev,
 				struct device **dev)
@ -640,7 +666,7 @@ static int device_init_cdev(struct hl_device *hdev, struct class *hclass,

 	device_initialize(*dev);
 	(*dev)->devt = MKDEV(hdev->major, minor);
-	(*dev)->class = hclass;
+	(*dev)->class = class;
 	(*dev)->release = device_release_func;
 	dev_set_drvdata(*dev, hdev);
 	dev_set_name(*dev, "%s", name);
@ -733,14 +759,14 @@ static void device_hard_reset_pending(struct work_struct *work)

 static void device_release_watchdog_func(struct work_struct *work)
 {
-	struct hl_device_reset_work *device_release_watchdog_work =
-				container_of(work, struct hl_device_reset_work, reset_work.work);
-	struct hl_device *hdev = device_release_watchdog_work->hdev;
+	struct hl_device_reset_work *watchdog_work =
+			container_of(work, struct hl_device_reset_work, reset_work.work);
+	struct hl_device *hdev = watchdog_work->hdev;
 	u32 flags;

-	dev_dbg(hdev->dev, "Device wasn't released in time. Initiate device reset.\n");
+	dev_dbg(hdev->dev, "Device wasn't released in time. Initiate hard-reset.\n");

-	flags = device_release_watchdog_work->flags | HL_DRV_RESET_FROM_WD_THR;
+	flags = watchdog_work->flags | HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_WD_THR;

 	hl_device_reset(hdev, flags);
 }
@ -805,7 +831,7 @@ static int device_early_init(struct hl_device *hdev)
 	}

 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
-		snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
+		snprintf(workq_name, 32, "hl%u-free-jobs-%u", hdev->cdev_idx, (u32) i);
 		hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
 		if (hdev->cq_wq[i] == NULL) {
 			dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
@ -814,14 +840,16 @@ static int device_early_init(struct hl_device *hdev)
 		}
 	}

-	hdev->eq_wq = create_singlethread_workqueue("hl-events");
+	snprintf(workq_name, 32, "hl%u-events", hdev->cdev_idx);
+	hdev->eq_wq = create_singlethread_workqueue(workq_name);
 	if (hdev->eq_wq == NULL) {
 		dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 		rc = -ENOMEM;
 		goto free_cq_wq;
 	}

-	hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0);
+	snprintf(workq_name, 32, "hl%u-cs-completions", hdev->cdev_idx);
+	hdev->cs_cmplt_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0);
 	if (!hdev->cs_cmplt_wq) {
 		dev_err(hdev->dev,
 			"Failed to allocate CS completions workqueue\n");
@ -829,7 +857,8 @@ static int device_early_init(struct hl_device *hdev)
 		goto free_eq_wq;
 	}

-	hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
+	snprintf(workq_name, 32, "hl%u-ts-free-obj", hdev->cdev_idx);
+	hdev->ts_free_obj_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0);
 	if (!hdev->ts_free_obj_wq) {
 		dev_err(hdev->dev,
 			"Failed to allocate Timestamp registration free workqueue\n");
@ -837,15 +866,15 @@ static int device_early_init(struct hl_device *hdev)
 		goto free_cs_cmplt_wq;
 	}

-	hdev->prefetch_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0);
+	snprintf(workq_name, 32, "hl%u-prefetch", hdev->cdev_idx);
+	hdev->prefetch_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0);
 	if (!hdev->prefetch_wq) {
 		dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n");
 		rc = -ENOMEM;
 		goto free_ts_free_wq;
 	}

-	hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
-					GFP_KERNEL);
+	hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL);
 	if (!hdev->hl_chip_info) {
 		rc = -ENOMEM;
 		goto free_prefetch_wq;
@ -857,7 +886,8 @@ static int device_early_init(struct hl_device *hdev)

 	hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr);

-	hdev->reset_wq = create_singlethread_workqueue("hl_device_reset");
+	snprintf(workq_name, 32, "hl%u_device_reset", hdev->cdev_idx);
+	hdev->reset_wq = create_singlethread_workqueue(workq_name);
 	if (!hdev->reset_wq) {
 		rc = -ENOMEM;
 		dev_err(hdev->dev, "Failed to create device reset WQ\n");
@ -887,6 +917,7 @@ static int device_early_init(struct hl_device *hdev)

 free_cb_mgr:
 	hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
+	hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr);
 free_chip_info:
 	kfree(hdev->hl_chip_info);
 free_prefetch_wq:
@ -930,6 +961,7 @@ static void device_early_fini(struct hl_device *hdev)
 	mutex_destroy(&hdev->clk_throttling.lock);

 	hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
+	hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr);

 	kfree(hdev->hl_chip_info);

@ -953,6 +985,8 @@ static void hl_device_heartbeat(struct work_struct *work)
 {
 	struct hl_device *hdev = container_of(work, struct hl_device,
 						work_heartbeat.work);
+	struct hl_info_fw_err_info info = {0};
+	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;

 	if (!hl_device_operational(hdev, NULL))
 		goto reschedule;
@ -963,7 +997,10 @@ static void hl_device_heartbeat(struct work_struct *work)
 	if (hl_device_operational(hdev, NULL))
 		dev_err(hdev->dev, "Device heartbeat failed!\n");

-	hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT);
+	info.err_type = HL_INFO_FW_HEARTBEAT_ERR;
+	info.event_mask = &event_mask;
+	hl_handle_fw_err(hdev, &info);
+	hl_device_cond_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT, event_mask);

 	return;

@ -1402,7 +1439,7 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 		 */
 		if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
 			dev_warn(hdev->dev,
-				"Failed to disable PCI access by F/W\n");
+				"Failed to disable FW's PCI access\n");
 	}
 }

@ -1424,12 +1461,11 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 */
 int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
-	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
-			reset_upon_device_release = false, schedule_hard_reset = false,
-			delay_reset, from_dev_release, from_watchdog_thread;
+	bool hard_reset, from_hard_reset_thread, fw_reset, reset_upon_device_release,
+		schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread;
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 	struct hl_ctx *ctx;
-	int i, rc;
+	int i, rc, hw_fini_rc;

 	if (!hdev->init_done) {
 		dev_err(hdev->dev, "Can't reset before initialization is done\n");
@ -1442,6 +1478,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE);
 	delay_reset = !!(flags & HL_DRV_RESET_DELAY);
 	from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);
+	reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release;

 	if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) {
 		dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n");
@ -1449,30 +1486,26 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	}

 	if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
-		hard_instead_soft = true;
+		dev_dbg(hdev->dev, "asic doesn't support compute reset - do hard-reset instead\n");
 		hard_reset = true;
 	}

-	if (hdev->reset_upon_device_release && from_dev_release) {
+	if (reset_upon_device_release) {
 		if (hard_reset) {
 			dev_crit(hdev->dev,
 				"Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n");
 			return -EINVAL;
 		}

-		reset_upon_device_release = true;
-
 		goto do_reset;
 	}

 	if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) {
-		hard_instead_soft = true;
+		dev_dbg(hdev->dev,
+			"asic doesn't allow inference soft reset - do hard-reset instead\n");
 		hard_reset = true;
 	}

-	if (hard_instead_soft)
-		dev_dbg(hdev->dev, "Doing hard-reset instead of compute reset\n");
-
 do_reset:
 	/* Re-entry of reset thread */
 	if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
@ -1480,14 +1513,14 @@ do_reset:

 	/*
 	 * Prevent concurrency in this function - only one reset should be
-	 * done at any given time. Only need to perform this if we didn't
-	 * get from the dedicated hard reset thread
+	 * done at any given time. We need to perform this only if we didn't
+	 * get here from a dedicated hard reset thread.
 	 */
 	if (!from_hard_reset_thread) {
 		/* Block future CS/VM/JOB completion operations */
 		spin_lock(&hdev->reset_info.lock);
 		if (hdev->reset_info.in_reset) {
-			/* We only allow scheduling of a hard reset during compute reset */
+			/* We allow scheduling of a hard reset only during a compute reset */
 			if (hard_reset && hdev->reset_info.in_compute_reset)
 				hdev->reset_info.hard_reset_schedule_flags = flags;
 			spin_unlock(&hdev->reset_info.lock);
@ -1505,15 +1538,17 @@ do_reset:

 		/* Cancel the device release watchdog work if required.
 		 * In case of reset-upon-device-release while the release watchdog work is
-		 * scheduled, do hard-reset instead of compute-reset.
+		 * scheduled due to a hard-reset, do hard-reset instead of compute-reset.
 		 */
 		if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) {
+			struct hl_device_reset_work *watchdog_work =
+					&hdev->device_release_watchdog_work;
+
 			hdev->reset_info.watchdog_active = 0;
 			if (!from_watchdog_thread)
-				cancel_delayed_work_sync(
-						&hdev->device_release_watchdog_work.reset_work);
+				cancel_delayed_work_sync(&watchdog_work->reset_work);

-			if (from_dev_release) {
+			if (from_dev_release && (watchdog_work->flags & HL_DRV_RESET_HARD)) {
 				hdev->reset_info.in_compute_reset = 0;
 				flags |= HL_DRV_RESET_HARD;
 				flags &= ~HL_DRV_RESET_DEV_RELEASE;
@ -1524,6 +1559,7 @@ do_reset:
 		if (delay_reset)
 			usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);

+escalate_reset_flow:
 		handle_reset_trigger(hdev, flags);

 		/* This also blocks future CS/VM/JOB completion operations */
@ -1539,7 +1575,6 @@ do_reset:
 			dev_dbg(hdev->dev, "Going to reset engines of inference device\n");
 	}

-again:
 	if ((hard_reset) && (!from_hard_reset_thread)) {
 		hdev->reset_info.hard_reset_pending = true;

@ -1592,7 +1627,7 @@ kill_processes:
 	}

 	/* Reset the H/W. It will be in idle state after this returns */
-	hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
+	hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);

 	if (hard_reset) {
 		hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
@ -1619,6 +1654,10 @@ kill_processes:
 		hl_ctx_put(ctx);
 	}

+	if (hw_fini_rc) {
+		rc = hw_fini_rc;
+		goto out_err;
+	}
 	/* Finished tear-down, starting to re-initialize */

 	if (hard_reset) {
@ -1787,7 +1826,7 @@ kill_processes:
 			hdev->disabled = true;
 			hard_reset = true;
 			handle_reset_trigger(hdev, flags);
-			goto again;
+			goto escalate_reset_flow;
 		}
 	}

@ -1804,20 +1843,19 @@ out_err:
 			"%s Failed to reset! Device is NOT usable\n",
 			dev_name(&(hdev)->pdev->dev));
 		hdev->reset_info.hard_reset_cnt++;
-	} else if (reset_upon_device_release) {
-		spin_unlock(&hdev->reset_info.lock);
-		dev_err(hdev->dev, "Failed to reset device after user release\n");
-		flags |= HL_DRV_RESET_HARD;
-		flags &= ~HL_DRV_RESET_DEV_RELEASE;
-		hard_reset = true;
-		goto again;
 	} else {
+		if (reset_upon_device_release) {
+			dev_err(hdev->dev, "Failed to reset device after user release\n");
+			flags &= ~HL_DRV_RESET_DEV_RELEASE;
+		} else {
+			dev_err(hdev->dev, "Failed to do compute reset\n");
+			hdev->reset_info.compute_reset_cnt++;
+		}
+
 		spin_unlock(&hdev->reset_info.lock);
-		dev_err(hdev->dev, "Failed to do compute reset\n");
-		hdev->reset_info.compute_reset_cnt++;
 		flags |= HL_DRV_RESET_HARD;
 		hard_reset = true;
-		goto again;
+		goto escalate_reset_flow;
 	}

 	hdev->reset_info.in_reset = 0;
@ -1840,10 +1878,6 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 {
 	struct hl_ctx *ctx = NULL;

-	/* Device release watchdog is only for hard reset */
-	if (!(flags & HL_DRV_RESET_HARD) && hdev->asic_prop.allow_inference_soft_reset)
-		goto device_reset;
-
 	/* F/W reset cannot be postponed */
 	if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW)
 		goto device_reset;
@ -1871,7 +1905,7 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 		goto out;

 	hdev->device_release_watchdog_work.flags = flags;
-	dev_dbg(hdev->dev, "Device is going to be reset in %u sec unless being released\n",
+	dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
 		hdev->device_release_watchdog_timeout_sec);
 	schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work,
 				msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000));
@ -1939,6 +1973,51 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
 }

+static int create_cdev(struct hl_device *hdev)
+{
+	char *name;
+	int rc;
+
+	hdev->cdev_idx = hdev->id / 2;
+
+	name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
+	if (!name) {
+		rc = -ENOMEM;
+		goto out_err;
+	}
+
+	/* Initialize cdev and device structures */
+	rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name,
+				&hdev->cdev, &hdev->dev);
+
+	kfree(name);
+
+	if (rc)
+		goto out_err;
+
+	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
+	if (!name) {
+		rc = -ENOMEM;
+		goto free_dev;
+	}
+
+	/* Initialize cdev and device structures for control device */
+	rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops,
+				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
+
+	kfree(name);
+
+	if (rc)
+		goto free_dev;
+
+	return 0;
+
+free_dev:
+	put_device(hdev->dev);
+out_err:
+	return rc;
+}
+
 /*
 * hl_device_init - main initialization function for habanalabs device
 *
@ -1948,48 +2027,19 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 * ASIC specific initialization functions. Finally, create the cdev and the
 * Linux device to expose it to the user
 */
-int hl_device_init(struct hl_device *hdev, struct class *hclass)
+int hl_device_init(struct hl_device *hdev)
 {
 	int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
-	char *name;
 	bool add_cdev_sysfs_on_err = false;

-	hdev->cdev_idx = hdev->id / 2;
-
-	name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto out_disabled;
-	}
-
-	/* Initialize cdev and device structures */
-	rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
-				&hdev->cdev, &hdev->dev);
-
-	kfree(name);
-
+	rc = create_cdev(hdev);
 	if (rc)
 		goto out_disabled;

-	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto free_dev;
-	}
-
-	/* Initialize cdev and device structures for control device */
-	rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
-				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
-
-	kfree(name);
-
-	if (rc)
-		goto free_dev;
-
 	/* Initialize ASIC function pointers and perform early init */
 	rc = device_early_init(hdev);
 	if (rc)
-		goto free_dev_ctrl;
+		goto free_dev;

 	user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
 				hdev->asic_prop.user_interrupt_count;
@ -2241,9 +2291,8 @@ free_usr_intr_mem:
 	kfree(hdev->user_interrupt);
 early_fini:
 	device_early_fini(hdev);
-free_dev_ctrl:
-	put_device(hdev->dev_ctrl);
 free_dev:
+	put_device(hdev->dev_ctrl);
 	put_device(hdev->dev);
 out_disabled:
 	hdev->disabled = true;
@ -2364,7 +2413,9 @@ void hl_device_fini(struct hl_device *hdev)
 	hl_cb_pool_fini(hdev);

 	/* Reset the H/W. It will be in idle state after this returns */
-	hdev->asic_funcs->hw_fini(hdev, true, false);
+	rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+	if (rc)
+		dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);

 	hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;

@ -2566,3 +2617,49 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_
 	if (event_mask)
 		*event_mask |=  HL_NOTIFIER_EVENT_PAGE_FAULT;
 }
+
+static void hl_capture_hw_err(struct hl_device *hdev, u16 event_id)
+{
+	struct hw_err_info *info = &hdev->captured_err_info.hw_err;
+
+	/* Capture only the first HW err */
+	if (atomic_cmpxchg(&info->event_detected, 0, 1))
+		return;
+
+	info->event.timestamp = ktime_to_ns(ktime_get());
+	info->event.event_id = event_id;
+
+	info->event_info_available = true;
+}
+
+void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask)
+{
+	hl_capture_hw_err(hdev, event_id);
+
+	if (event_mask)
+		*event_mask |= HL_NOTIFIER_EVENT_CRITICL_HW_ERR;
+}
+
+static void hl_capture_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *fw_info)
+{
+	struct fw_err_info *info = &hdev->captured_err_info.fw_err;
+
+	/* Capture only the first FW error */
+	if (atomic_cmpxchg(&info->event_detected, 0, 1))
+		return;
+
+	info->event.timestamp = ktime_to_ns(ktime_get());
+	info->event.err_type = fw_info->err_type;
+	if (fw_info->err_type == HL_INFO_FW_REPORTED_ERR)
+		info->event.event_id = fw_info->event_id;
+
+	info->event_info_available = true;
+}
+
+void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info)
+{
+	hl_capture_fw_err(hdev, info);
+
+	if (info->event_mask)
+		*info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
+}
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@ -3152,7 +3152,7 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in
 int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
 						dma_addr_t buff, u32 *size)
 {
-	struct cpucp_packet pkt = {0};
+	struct cpucp_packet pkt = {};
 	u64 result;
 	int rc = 0;

--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@ -155,18 +155,12 @@ enum hl_mmu_enablement {
 #define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \
 	hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__)

-#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \
-	hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__)
-
 #define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \
 	hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__)

 #define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \
 	hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__)

-#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \
-	hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__)
-
 #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
 	hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)

@ -378,6 +372,7 @@ enum hl_cs_type {
 	CS_RESERVE_SIGNALS,
 	CS_UNRESERVE_SIGNALS,
 	CS_TYPE_ENGINE_CORE,
+	CS_TYPE_ENGINES,
 	CS_TYPE_FLUSH_PCI_HBW_WRITES,
 };

@ -592,6 +587,8 @@ struct hl_hints_range {
 * @host_base_address: host physical start address for host DMA from device
 * @host_end_address: host physical end address for host DMA from device
 * @max_freq_value: current max clk frequency.
+ * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use
+ *                                  in order to raise events toward FW.
 * @clk_pll_index: clock PLL index that specify which PLL determines the clock
 *                 we display to the user
 * @mmu_pgt_size: MMU page tables total size.
@ -612,8 +609,8 @@ struct hl_hints_range {
 * @cb_pool_cb_cnt: number of CBs in the CB pool.
 * @cb_pool_cb_size: size of each CB in the CB pool.
 * @decoder_enabled_mask: which decoders are enabled.
- * @decoder_binning_mask: which decoders are binned, 0 means usable and 1
- *                        means binned (at most one binned decoder per dcore).
+ * @decoder_binning_mask: which decoders are binned, 0 means usable and 1 means binned.
+ * @rotator_enabled_mask: which rotators are enabled.
 * @edma_enabled_mask: which EDMAs are enabled.
 * @edma_binning_mask: which EDMAs are binned, 0 means usable and 1 means
 *                     binned (at most one binned DMA).
@ -648,7 +645,8 @@ struct hl_hints_range {
 *                                      which the property supports_user_set_page_size is true
 *                                      (i.e. the DRAM supports multiple page sizes), otherwise
 *                                      it will shall  be equal to dram_page_size.
- * @num_engine_cores: number of engine cpu cores
+ * @num_engine_cores: number of engine cpu cores.
+ * @max_num_of_engines: maximum number of all engines in the ASIC.
 * @num_of_special_blocks: special_blocks array size.
 * @glbl_err_cause_num: global err cause number.
 * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
@ -663,6 +661,8 @@ struct hl_hints_range {
 * @first_available_cq: first available CQ for the user.
 * @user_interrupt_count: number of user interrupts.
 * @user_dec_intr_count: number of decoder interrupts exposed to user.
+ * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
+ * @unexpected_user_error_interrupt_id: interrupt id used to indicate an unexpected user error.
 * @cache_line_size: device cache line size.
 * @server_type: Server type that the ASIC is currently installed in.
 *               The value is according to enum hl_server_type in uapi file.
@ -698,6 +698,7 @@ struct hl_hints_range {
 * @supports_user_set_page_size: true if user can set the allocation page size.
 * @dma_mask: the dma mask to be set for this device
 * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
+ * @supports_engine_modes: true if changing engines/engine_cores modes is supported.
 */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@ -739,6 +740,7 @@ struct asic_fixed_properties {
 	u64				host_base_address;
 	u64				host_end_address;
 	u64				max_freq_value;
+	u64				engine_core_interrupt_reg_addr;
 	u32				clk_pll_index;
 	u32				mmu_pgt_size;
 	u32				mmu_pte_size;
@ -759,6 +761,7 @@ struct asic_fixed_properties {
 	u32				cb_pool_cb_size;
 	u32				decoder_enabled_mask;
 	u32				decoder_binning_mask;
+	u32				rotator_enabled_mask;
 	u32				edma_enabled_mask;
 	u32				edma_binning_mask;
 	u32				max_pending_cs;
@ -775,6 +778,7 @@ struct asic_fixed_properties {
 	u32				xbar_edge_enabled_mask;
 	u32				device_mem_alloc_default_page_size;
 	u32				num_engine_cores;
+	u32				max_num_of_engines;
 	u32				num_of_special_blocks;
 	u32				glbl_err_cause_num;
 	u32				hbw_flush_reg;
@ -788,6 +792,8 @@ struct asic_fixed_properties {
 	u16				first_available_cq[HL_MAX_DCORES];
 	u16				user_interrupt_count;
 	u16				user_dec_intr_count;
+	u16				tpc_interrupt_id;
+	u16				unexpected_user_error_interrupt_id;
 	u16				cache_line_size;
 	u16				server_type;
 	u8				completion_queues_count;
@ -811,6 +817,7 @@ struct asic_fixed_properties {
 	u8				supports_user_set_page_size;
 	u8				dma_mask;
 	u8				supports_advanced_cpucp_rc;
+	u8				supports_engine_modes;
 };

 /**
@ -1096,6 +1103,8 @@ struct hl_cq {
 enum hl_user_interrupt_type {
 	HL_USR_INTERRUPT_CQ = 0,
 	HL_USR_INTERRUPT_DECODER,
+	HL_USR_INTERRUPT_TPC,
+	HL_USR_INTERRUPT_UNEXPECTED
 };

 /**
@ -1104,6 +1113,7 @@ enum hl_user_interrupt_type {
 * @type: user interrupt type
 * @wait_list_head: head to the list of user threads pending on this interrupt
 * @wait_list_lock: protects wait_list_head
+ * @timestamp: last timestamp taken upon interrupt
 * @interrupt_id: msix interrupt id
 */
 struct hl_user_interrupt {
@ -1111,6 +1121,7 @@ struct hl_user_interrupt {
 	enum hl_user_interrupt_type	type;
 	struct list_head		wait_list_head;
 	spinlock_t			wait_list_lock;
+	ktime_t				timestamp;
 	u32				interrupt_id;
 };

@ -1562,6 +1573,7 @@ struct engines_data {
 * @access_dev_mem: access device memory
 * @set_dram_bar_base: set the base of the DRAM BAR
 * @set_engine_cores: set a config command to engine cores
+ * @set_engines: set a config command to user engines
 * @send_device_activity: indication to FW about device availability
 * @set_dram_properties: set DRAM related properties.
 * @set_binning_masks: set binning/enable masks for all relevant components.
@ -1574,7 +1586,7 @@ struct hl_asic_funcs {
 	int (*sw_init)(struct hl_device *hdev);
 	int (*sw_fini)(struct hl_device *hdev);
 	int (*hw_init)(struct hl_device *hdev);
-	void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
+	int (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
 	void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
 	int (*suspend)(struct hl_device *hdev);
 	int (*resume)(struct hl_device *hdev);
@ -1701,6 +1713,8 @@ struct hl_asic_funcs {
 	u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
 	int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
 					u32 num_cores, u32 core_command);
+	int (*set_engines)(struct hl_device *hdev, u32 *engine_ids,
+					u32 num_engines, u32 engine_command);
 	int (*send_device_activity)(struct hl_device *hdev, bool open);
 	int (*set_dram_properties)(struct hl_device *hdev);
 	int (*set_binning_masks)(struct hl_device *hdev);
@ -1824,7 +1838,7 @@ struct hl_cs_outcome_store {
 * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
 * @hdev: pointer to the device structure.
 * @refcount: reference counter for the context. Context is released only when
- *		this hits 0l. It is incremented on CS and CS_WAIT.
+ *		this hits 0. It is incremented on CS and CS_WAIT.
 * @cs_pending: array of hl fence objects representing pending CS.
 * @outcome_store: storage data structure used to remember outcomes of completed
 *                 command submissions for a long time after CS id wraparound.
@ -2318,7 +2332,7 @@ struct hl_debugfs_entry {
 * @userptr_list: list of available userptrs (virtual memory chunk descriptor).
 * @userptr_spinlock: protects userptr_list.
 * @ctx_mem_hash_list: list of available contexts with MMU mappings.
- * @ctx_mem_hash_spinlock: protects cb_list.
+ * @ctx_mem_hash_mutex: protects list of available contexts with MMU mappings.
 * @data_dma_blob_desc: data DMA descriptor of blob.
 * @mon_dump_blob_desc: monitor dump descriptor of blob.
 * @state_dump: data of the system states in case of a bad cs.
@ -2349,7 +2363,7 @@ struct hl_dbg_device_entry {
 	struct list_head		userptr_list;
 	spinlock_t			userptr_spinlock;
 	struct list_head		ctx_mem_hash_list;
-	spinlock_t			ctx_mem_hash_spinlock;
+	struct mutex			ctx_mem_hash_mutex;
 	struct debugfs_blob_wrapper	data_dma_blob_desc;
 	struct debugfs_blob_wrapper	mon_dump_blob_desc;
 	char				*state_dump[HL_STATE_DUMP_HIST_LEN];
@ -2974,8 +2988,8 @@ struct cs_timeout_info {
 * @cq_addr: the address of the current handled command buffer
 * @cq_size: the size of the current handled command buffer
 * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array.
- *                       should be equal to 1 incase of undefined opcode
- *                       in Upper-CP (specific stream) and equal to 4 incase
+ *                       should be equal to 1 in case of undefined opcode
+ *                       in Upper-CP (specific stream) and equal to 4 in case
 *                       of undefined opcode in Lower-CP.
 * @engine_id: engine-id that the error occurred on
 * @stream_id: the stream id the error occurred on. In case the stream equals to
@ -3031,18 +3045,56 @@ struct razwi_info {
 	bool				razwi_info_available;
 };

+/**
+ * struct hw_err_info - HW error information.
+ * @event: holds information on the event.
+ * @event_detected: if set as 1, then a HW event was discovered for the
+ *                  first time after the driver has finished booting-up.
+ *                  currently we assume that only fatal events (that require hard-reset) are
+ *                  reported so we don't care of the others that might follow it.
+ *                  so once changed to 1, it will remain that way.
+ *                  TODO: support multiple events.
+ * @event_info_available: indicates that a HW event info is now available.
+ */
+struct hw_err_info {
+	struct hl_info_hw_err_event	event;
+	atomic_t			event_detected;
+	bool				event_info_available;
+};
+
+/**
+ * struct fw_err_info - FW error information.
+ * @event: holds information on the event.
+ * @event_detected: if set as 1, then a FW event was discovered for the
+ *                  first time after the driver has finished booting-up.
+ *                  currently we assume that only fatal events (that require hard-reset) are
+ *                  reported so we don't care of the others that might follow it.
+ *                  so once changed to 1, it will remain that way.
+ *                  TODO: support multiple events.
+ * @event_info_available: indicates that a HW event info is now available.
+ */
+struct fw_err_info {
+	struct hl_info_fw_err_event	event;
+	atomic_t			event_detected;
+	bool				event_info_available;
+};
+
 /**
 * struct hl_error_info - holds information collected during an error.
 * @cs_timeout: CS timeout error information.
 * @razwi_info: RAZWI information.
 * @undef_opcode: undefined opcode information.
 * @page_fault_info: page fault information.
+ * @hw_err: (fatal) hardware error information.
+ * @fw_err: firmware error information.
 */
 struct hl_error_info {
 	struct cs_timeout_info		cs_timeout;
 	struct razwi_info		razwi_info;
 	struct undefined_opcode_info	undef_opcode;
 	struct page_fault_info		page_fault_info;
+	struct hw_err_info		hw_err;
+	struct fw_err_info		fw_err;
 };

 /**
@ -3090,6 +3142,7 @@ struct hl_reset_info {
 *		   (required only for PCI address match mode)
 * @pcie_bar: array of available PCIe bars virtual addresses.
 * @rmmio: configuration area address on SRAM.
+ * @hclass: pointer to the habanalabs class.
 * @cdev: related char device.
 * @cdev_ctrl: char device for control operations only (INFO IOCTL)
 * @dev: related kernel basic device structure.
@ -3104,6 +3157,8 @@ struct hl_reset_info {
 * @user_interrupt: array of hl_user_interrupt. upon the corresponding user
 *                  interrupt, driver will monitor the list of fences
 *                  registered to this interrupt.
+ * @tpc_interrupt: single TPC interrupt for all TPCs.
+ * @unexpected_error_interrupt: single interrupt for unexpected user error indication.
 * @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts.
 *                         upon any user CQ interrupt, driver will monitor the
 *                         list of fences registered to this common structure.
@ -3199,6 +3254,7 @@ struct hl_reset_info {
 *                drams are binned-out
 * @tpc_binning: contains mask of tpc engines that is received from the f/w which indicates which
 *               tpc engines are binned-out
+ * @dmabuf_export_cnt: number of dma-buf exporting.
 * @card_type: Various ASICs have several card types. This indicates the card
 *             type of the current device.
 * @major: habanalabs kernel driver major.
@ -3253,6 +3309,8 @@ struct hl_reset_info {
 * @supports_mmu_prefetch: true if prefetch is supported, otherwise false.
 * @reset_upon_device_release: reset the device when the user closes the file descriptor of the
 *                             device.
+ * @supports_ctx_switch: true if a ctx switch is required upon first submission.
+ * @support_preboot_binning: true if we support read binning info from preboot.
 * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing.
 * @fw_components: Controls which f/w components to load to the device. There are multiple f/w
 *                 stages and sometimes we want to stop at a certain stage. Used only for testing.
@ -3266,14 +3324,13 @@ struct hl_reset_info {
 *                         Used only for testing.
 * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies
 *             that the f/w is always alive. Used only for testing.
- * @supports_ctx_switch: true if a ctx switch is required upon first submission.
- * @support_preboot_binning: true if we support read binning info from preboot.
 */
 struct hl_device {
 	struct pci_dev			*pdev;
 	u64				pcie_bar_phys[HL_PCI_NUM_BARS];
 	void __iomem			*pcie_bar[HL_PCI_NUM_BARS];
 	void __iomem			*rmmio;
+	struct class			*hclass;
 	struct cdev			cdev;
 	struct cdev			cdev_ctrl;
 	struct device			*dev;
@ -3286,6 +3343,8 @@ struct hl_device {
 	enum hl_asic_type		asic_type;
 	struct hl_cq			*completion_queue;
 	struct hl_user_interrupt	*user_interrupt;
+	struct hl_user_interrupt	tpc_interrupt;
+	struct hl_user_interrupt	unexpected_error_interrupt;
 	struct hl_user_interrupt	common_user_cq_interrupt;
 	struct hl_user_interrupt	common_decoder_interrupt;
 	struct hl_cs			**shadow_cs_queue;
@ -3369,7 +3428,7 @@ struct hl_device {
 	u64				fw_comms_poll_interval_usec;
 	u64				dram_binning;
 	u64				tpc_binning;
-
+	atomic_t			dmabuf_export_cnt;
 	enum cpucp_card_types		card_type;
 	u32				major;
 	u32				high_pll;
@ -3412,7 +3471,7 @@ struct hl_device {
 	u8				supports_ctx_switch;
 	u8				support_preboot_binning;

-	/* Parameters for bring-up */
+	/* Parameters for bring-up to be upstreamed */
 	u64				nic_ports_mask;
 	u64				fw_components;
 	u8				mmu_enable;
@ -3450,6 +3509,20 @@ struct hl_cs_encaps_sig_handle {
 	u32  count;
 };

+/**
+ * struct hl_info_fw_err_info - firmware error information structure
+ * @err_type: The type of error detected (or reported).
+ * @event_mask: Pointer to the event mask to be modified with the detected error flag
+ *              (can be NULL)
+ * @event_id: The id of the event that reported the error
+ *            (applicable when err_type is HL_INFO_FW_REPORTED_ERR).
+ */
+struct hl_info_fw_err_info {
+	enum hl_info_fw_err_type err_type;
+	u64 *event_mask;
+	u16 event_id;
+};
+
 /*
 * IOCTLs
 */
@ -3474,6 +3547,10 @@ struct hl_ioctl_desc {
 	hl_ioctl_t *func;
 };

+static inline bool hl_is_fw_ver_below_1_9(struct hl_device *hdev)
+{
+	return (hdev->fw_major_version < 42);
+}

 /*
 * Kernel module functions that can be accessed by entire module
@ -3537,14 +3614,12 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 }

 uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
+void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle);
+void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr);
 void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
 					gfp_t flag, const char *caller);
 void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
 					dma_addr_t dma_handle, const char *caller);
-void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
-						dma_addr_t *dma_handle, const char *caller);
-void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
-						const char *caller);
 void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
 					dma_addr_t *dma_handle, const char *caller);
 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
@ -3591,7 +3666,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 irqreturn_t hl_irq_handler_eq(int irq, void *arg);
 irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
 irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
-irqreturn_t hl_irq_handler_default(int irq, void *arg);
+irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
 u32 hl_cq_inc_ptr(u32 ptr);

 int hl_asid_init(struct hl_device *hdev);
@ -3612,7 +3687,7 @@ int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
 void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
 void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);

-int hl_device_init(struct hl_device *hdev, struct class *hclass);
+int hl_device_init(struct hl_device *hdev);
 void hl_device_fini(struct hl_device *hdev);
 int hl_device_suspend(struct hl_device *hdev);
 int hl_device_resume(struct hl_device *hdev);
@ -3662,6 +3737,7 @@ bool cs_needs_timeout(struct hl_cs *cs);
 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
 void hl_multi_cs_completion_init(struct hl_device *hdev);
+u32 hl_get_active_cs_num(struct hl_device *hdev);

 void goya_set_asic_funcs(struct hl_device *hdev);
 void gaudi_set_asic_funcs(struct hl_device *hdev);
@ -3861,6 +3937,7 @@ const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);

 void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg);
 void hl_mem_mgr_fini(struct hl_mem_mgr *mmg);
+void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg);
 int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
 		    void *args);
 struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg,
@ -3879,6 +3956,8 @@ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_o
 void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
 void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
 				u64 *event_mask);
+void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask);
+void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);

 #ifdef CONFIG_DEBUG_FS

--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@ -12,7 +12,6 @@
 #include "../include/hw_ip/pci/pci_general.h"

 #include <linux/pci.h>
-#include <linux/aer.h>
 #include <linux/module.h>

 #define CREATE_TRACE_POINTS
@ -221,12 +220,9 @@ int hl_device_open(struct inode *inode, struct file *filp)

 	hl_debugfs_add_file(hpriv);

+	memset(&hdev->captured_err_info, 0, sizeof(hdev->captured_err_info));
 	atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
-	atomic_set(&hdev->captured_err_info.razwi_info.razwi_detected, 0);
-	atomic_set(&hdev->captured_err_info.page_fault_info.page_fault_detected, 0);
 	hdev->captured_err_info.undef_opcode.write_enable = true;
-	hdev->captured_err_info.razwi_info.razwi_info_available = false;
-	hdev->captured_err_info.page_fault_info.page_fault_info_available = false;

 	hdev->open_counter++;
 	hdev->last_successful_open_jif = jiffies;
@ -237,6 +233,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 out_err:
 	mutex_unlock(&hdev->fpriv_list_lock);
 	hl_mem_mgr_fini(&hpriv->mem_mgr);
+	hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 	filp->private_data = NULL;
 	mutex_destroy(&hpriv->ctx_lock);
@ -324,6 +321,7 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
 	hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);

 	hdev->major = hl_major;
+	hdev->hclass = hl_class;
 	hdev->memory_scrub = memory_scrub;
 	hdev->reset_on_lockup = reset_on_lockup;
 	hdev->boot_error_status_mask = boot_error_status_mask;
@ -550,9 +548,7 @@ static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)

 	pci_set_drvdata(pdev, hdev);

-	pci_enable_pcie_error_reporting(pdev);
-
-	rc = hl_device_init(hdev, hl_class);
+	rc = hl_device_init(hdev);
 	if (rc) {
 		dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
 		rc = -ENODEV;
@ -562,7 +558,6 @@ static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;

 disable_device:
-	pci_disable_pcie_error_reporting(pdev);
 	pci_set_drvdata(pdev, NULL);
 	destroy_hdev(hdev);

@ -585,7 +580,6 @@ static void hl_pci_remove(struct pci_dev *pdev)
 		return;

 	hl_device_fini(hdev);
-	pci_disable_pcie_error_reporting(pdev);
 	pci_set_drvdata(pdev, NULL);
 	destroy_hdev(hdev);
 }
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@ -102,11 +102,15 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 	hw_ip.mme_master_slave_mode = prop->mme_master_slave_mode;
 	hw_ip.first_available_interrupt_id = prop->first_available_user_interrupt;
 	hw_ip.number_of_user_interrupts = prop->user_interrupt_count;
+	hw_ip.tpc_interrupt_id = prop->tpc_interrupt_id;

 	hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
 	hw_ip.server_type = prop->server_type;
 	hw_ip.security_enabled = prop->fw_security_enabled;
 	hw_ip.revision_id = hdev->pdev->revision;
+	hw_ip.rotator_enabled_mask = prop->rotator_enabled_mask;
+	hw_ip.engine_core_interrupt_reg_addr = prop->engine_core_interrupt_reg_addr;
+	hw_ip.reserved_dram_size = dram_kmd_size;

 	return copy_to_user(out, &hw_ip,
 		min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
@ -830,6 +834,50 @@ static int user_mappings_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	return copy_to_user(out, pgf_info->user_mappings, actual_size) ? -EFAULT : 0;
 }

+static int hw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
+	struct hl_device *hdev = hpriv->hdev;
+	u32 user_buf_size = args->return_size;
+	struct hw_err_info *info;
+	int rc;
+
+	if ((!user_buf_size) || (!user_buf))
+		return -EINVAL;
+
+	if (user_buf_size < sizeof(struct hl_info_hw_err_event))
+		return -ENOMEM;
+
+	info = &hdev->captured_err_info.hw_err;
+	if (!info->event_info_available)
+		return -ENOENT;
+
+	rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_hw_err_event));
+	return rc ? -EFAULT : 0;
+}
+
+static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
+	struct hl_device *hdev = hpriv->hdev;
+	u32 user_buf_size = args->return_size;
+	struct fw_err_info *info;
+	int rc;
+
+	if ((!user_buf_size) || (!user_buf))
+		return -EINVAL;
+
+	if (user_buf_size < sizeof(struct hl_info_fw_err_event))
+		return -ENOMEM;
+
+	info = &hdev->captured_err_info.fw_err;
+	if (!info->event_info_available)
+		return -ENOENT;
+
+	rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_fw_err_event));
+	return rc ? -EFAULT : 0;
+}
+
 static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
 {
 	void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
@ -950,6 +998,14 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_UNREGISTER_EVENTFD:
 		return eventfd_unregister(hpriv, args);

+	case HL_INFO_HW_ERR_EVENT:
+		return hw_err_info(hpriv, args);
+
+	case HL_INFO_FW_ERR_EVENT:
+		return fw_err_info(hpriv, args);
+
+	case HL_INFO_DRAM_USAGE:
+		return dram_usage_info(hpriv, args);
 	default:
 		break;
 	}
@ -962,10 +1018,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	}

 	switch (args->op) {
-	case HL_INFO_DRAM_USAGE:
-		rc = dram_usage_info(hpriv, args);
-		break;
-
 	case HL_INFO_HW_IDLE:
 		rc = hw_idle(hdev, args);
 		break;
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@ -280,7 +280,6 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	struct list_head *ts_reg_free_list_head = NULL;
 	struct timestamp_reg_work_obj *job;
 	bool reg_node_handle_fail = false;
-	ktime_t now = ktime_get();
 	int rc;

 	/* For registration nodes:
@ -303,13 +302,13 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 			if (pend->ts_reg_info.buf) {
 				if (!reg_node_handle_fail) {
 					rc = handle_registration_node(hdev, pend,
-								&ts_reg_free_list_head, now);
+							&ts_reg_free_list_head, intr->timestamp);
 					if (rc)
 						reg_node_handle_fail = true;
 				}
 			} else {
 				/* Handle wait target value node */
-				pend->fence.timestamp = now;
+				pend->fence.timestamp = intr->timestamp;
 				complete_all(&pend->fence.completion);
 			}
 		}
@ -326,6 +325,26 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	}
 }

+static void handle_tpc_interrupt(struct hl_device *hdev)
+{
+	u64 event_mask;
+	u32 flags;
+
+	event_mask = HL_NOTIFIER_EVENT_TPC_ASSERT |
+		HL_NOTIFIER_EVENT_USER_ENGINE_ERR |
+		HL_NOTIFIER_EVENT_DEVICE_RESET;
+
+	flags = HL_DRV_RESET_DELAY;
+
+	dev_err_ratelimited(hdev->dev, "Received TPC assert\n");
+	hl_device_cond_reset(hdev, flags, event_mask);
+}
+
+static void handle_unexpected_user_interrupt(struct hl_device *hdev)
+{
+	dev_err_ratelimited(hdev->dev, "Received unexpected user error interrupt\n");
+}
+
 /**
 * hl_irq_handler_user_interrupt - irq handler for user interrupts
 *
@ -334,6 +353,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 *
 */
 irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
+{
+	struct hl_user_interrupt *user_int = arg;
+
+	user_int->timestamp = ktime_get();
+
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * hl_irq_user_interrupt_thread_handler - irq thread handler for user interrupts.
+ * This function is invoked by threaded irq mechanism
+ *
+ * @irq: irq number
+ * @arg: pointer to user interrupt structure
+ *
+ */
+irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
 {
 	struct hl_user_interrupt *user_int = arg;
 	struct hl_device *hdev = user_int->hdev;
@ -351,6 +387,12 @@ irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
 		/* Handle decoder interrupt registered on this specific irq */
 		handle_user_interrupt(hdev, user_int);
 		break;
+	case HL_USR_INTERRUPT_TPC:
+		handle_tpc_interrupt(hdev);
+		break;
+	case HL_USR_INTERRUPT_UNEXPECTED:
+		handle_unexpected_user_interrupt(hdev);
+		break;
 	default:
 		break;
 	}
@ -358,24 +400,6 @@ irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
 	return IRQ_HANDLED;
 }

-/**
- * hl_irq_handler_default - default irq handler
- *
- * @irq: irq number
- * @arg: pointer to user interrupt structure
- *
- */
-irqreturn_t hl_irq_handler_default(int irq, void *arg)
-{
-	struct hl_user_interrupt *user_interrupt = arg;
-	struct hl_device *hdev = user_interrupt->hdev;
-	u32 interrupt_id = user_interrupt->interrupt_id;
-
-	dev_err(hdev->dev, "got invalid user interrupt %u", interrupt_id);
-
-	return IRQ_HANDLED;
-}
-
 /**
 * hl_irq_handler_eq - irq handler for event queue
 *
@ -405,11 +429,10 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)

 		cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe);
 		if ((hdev->event_queue.check_eqe_index) &&
-				(((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK)
-							!= cur_eqe_index)) {
+				(((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) != cur_eqe_index)) {
 			dev_dbg(hdev->dev,
-				"EQE 0x%x in queue is ready but index does not match %d!=%d",
-				eq_base[eq->ci].hdr.ctl,
+				"EQE %#x in queue is ready but index does not match %d!=%d",
+				cur_eqe,
 				((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK),
 				cur_eqe_index);
 			break;
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@ -235,10 +235,8 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
 	}

 	rc = hl_pin_host_memory(hdev, addr, size, userptr);
-	if (rc) {
-		dev_err(hdev->dev, "Failed to pin host memory\n");
+	if (rc)
 		goto pin_err;
-	}

 	userptr->dma_mapped = true;
 	userptr->dir = DMA_BIDIRECTIONAL;
@ -1097,10 +1095,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
 			huge_page_size = hdev->asic_prop.pmmu_huge.page_size;

 		rc = dma_map_host_va(hdev, addr, size, &userptr);
-		if (rc) {
-			dev_err(hdev->dev, "failed to get userptr from va\n");
+		if (rc)
 			return rc;
-		}

 		rc = init_phys_pg_pack_from_userptr(ctx, userptr,
 				&phys_pg_pack, false);
@ -1270,6 +1266,18 @@ init_page_pack_err:
 	return rc;
 }

+/* Should be called while the context's mem_hash_lock is taken */
+static struct hl_vm_hash_node *get_vm_hash_node_locked(struct hl_ctx *ctx, u64 vaddr)
+{
+	struct hl_vm_hash_node *hnode;
+
+	hash_for_each_possible(ctx->mem_hash, hnode, node, vaddr)
+		if (vaddr == hnode->vaddr)
+			return hnode;
+
+	return NULL;
+}
+
 /**
 * unmap_device_va() - unmap the given device virtual address.
 * @ctx: pointer to the context structure.
@ -1285,10 +1293,10 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 {
 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
 	u64 vaddr = args->unmap.device_virt_addr;
-	struct hl_vm_hash_node *hnode = NULL;
 	struct asic_fixed_properties *prop;
 	struct hl_device *hdev = ctx->hdev;
 	struct hl_userptr *userptr = NULL;
+	struct hl_vm_hash_node *hnode;
 	struct hl_va_range *va_range;
 	enum vm_type *vm_type;
 	bool is_userptr;
@ -1298,15 +1306,10 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,

 	/* protect from double entrance */
 	mutex_lock(&ctx->mem_hash_lock);
-	hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
-		if (vaddr == hnode->vaddr)
-			break;
-
+	hnode = get_vm_hash_node_locked(ctx, vaddr);
 	if (!hnode) {
 		mutex_unlock(&ctx->mem_hash_lock);
-		dev_err(hdev->dev,
-			"unmap failed, no mem hnode for vaddr 0x%llx\n",
-			vaddr);
+		dev_err(hdev->dev, "unmap failed, no mem hnode for vaddr 0x%llx\n", vaddr);
 		return -EINVAL;
 	}

@ -1779,6 +1782,44 @@ static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
 	kfree(sgt);
 }

+static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm_hash_node *hnode;
+
+	/* get the memory handle */
+	mutex_lock(&ctx->mem_hash_lock);
+	hnode = get_vm_hash_node_locked(ctx, addr);
+	if (!hnode) {
+		mutex_unlock(&ctx->mem_hash_lock);
+		dev_dbg(hdev->dev, "map address %#llx not found\n", addr);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (upper_32_bits(hnode->handle)) {
+		mutex_unlock(&ctx->mem_hash_lock);
+		dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n",
+				hnode->handle, addr);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/*
+	 * node found, increase export count so this memory cannot be unmapped
+	 * and the hash node cannot be deleted.
+	 */
+	hnode->export_cnt++;
+	mutex_unlock(&ctx->mem_hash_lock);
+
+	return hnode;
+}
+
+static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode)
+{
+	mutex_lock(&ctx->mem_hash_lock);
+	hnode->export_cnt--;
+	mutex_unlock(&ctx->mem_hash_lock);
+}
+
 static void hl_release_dmabuf(struct dma_buf *dmabuf)
 {
 	struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
@ -1789,13 +1830,15 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)

 	ctx = hl_dmabuf->ctx;

-	if (hl_dmabuf->memhash_hnode) {
-		mutex_lock(&ctx->mem_hash_lock);
-		hl_dmabuf->memhash_hnode->export_cnt--;
-		mutex_unlock(&ctx->mem_hash_lock);
-	}
+	if (hl_dmabuf->memhash_hnode)
+		memhash_node_export_put(ctx, hl_dmabuf->memhash_hnode);

+	atomic_dec(&ctx->hdev->dmabuf_export_cnt);
 	hl_ctx_put(ctx);
+
+	/* Paired with get_file() in export_dmabuf() */
+	fput(ctx->hpriv->filp);
+
 	kfree(hl_dmabuf);
 }

@ -1834,6 +1877,13 @@ static int export_dmabuf(struct hl_ctx *ctx,

 	hl_dmabuf->ctx = ctx;
 	hl_ctx_get(hl_dmabuf->ctx);
+	atomic_inc(&ctx->hdev->dmabuf_export_cnt);
+
+	/* Get compute device file to enforce release order, such that all exported dma-buf will be
+	 * released first and only then the compute device.
+	 * Paired with fput() in hl_release_dmabuf().
+	 */
+	get_file(ctx->hpriv->filp);

 	*dmabuf_fd = fd;

@ -1933,47 +1983,6 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
 	return 0;
 }

-static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr)
-{
-	struct hl_device *hdev = ctx->hdev;
-	struct hl_vm_hash_node *hnode;
-
-	/* get the memory handle */
-	mutex_lock(&ctx->mem_hash_lock);
-	hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)addr)
-		if (addr == hnode->vaddr)
-			break;
-
-	if (!hnode) {
-		mutex_unlock(&ctx->mem_hash_lock);
-		dev_dbg(hdev->dev, "map address %#llx not found\n", addr);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (upper_32_bits(hnode->handle)) {
-		mutex_unlock(&ctx->mem_hash_lock);
-		dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n",
-				hnode->handle, addr);
-		return ERR_PTR(-EINVAL);
-	}
-
-	/*
-	 * node found, increase export count so this memory cannot be unmapped
-	 * and the hash node cannot be deleted.
-	 */
-	hnode->export_cnt++;
-	mutex_unlock(&ctx->mem_hash_lock);
-
-	return hnode;
-}
-
-static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode)
-{
-	mutex_lock(&ctx->mem_hash_lock);
-	hnode->export_cnt--;
-	mutex_unlock(&ctx->mem_hash_lock);
-}
-
 static struct hl_vm_phys_pg_pack *get_phys_pg_pack_from_hash_node(struct hl_device *hdev,
 							struct hl_vm_hash_node *hnode)
 {
@ -2221,11 +2230,11 @@ static struct hl_mmap_mem_buf_behavior hl_ts_behavior = {
 * allocate_timestamps_buffers() - allocate timestamps buffers
 * This function will allocate ts buffer that will later on be mapped to the user
 * in order to be able to read the timestamp.
- * in additon it'll allocate an extra buffer for registration management.
+ * in addition it'll allocate an extra buffer for registration management.
 * since we cannot fail during registration for out-of-memory situation, so
 * we'll prepare a pool which will be used as user interrupt nodes and instead
 * of dynamically allocating nodes while registration we'll pick the node from
- * this pool. in addtion it'll add node to the mapping hash which will be used
+ * this pool. in addition it'll add node to the mapping hash which will be used
 * to map user ts buffer to the internal kernel ts buffer.
 * @hpriv: pointer to the private data of the fd
 * @args: ioctl input
--- a/drivers/accel/habanalabs/common/memory_mgr.c
+++ b/drivers/accel/habanalabs/common/memory_mgr.c
@ -275,7 +275,7 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,

 	if (atomic_cmpxchg(&buf->mmap, 0, 1)) {
 		dev_err(mmg->dev,
-			"%s, Memory mmap failed, already mmaped to user\n",
+			"%s, Memory mmap failed, already mapped to user\n",
 			buf->behavior->topic);
 		rc = -EINVAL;
 		goto put_mem;
@ -341,8 +341,19 @@ void hl_mem_mgr_fini(struct hl_mem_mgr *mmg)
 				"%s: Buff handle %u for CTX is still alive\n",
 				topic, id);
 	}
+}

-	/* TODO: can it happen that some buffer is still in use at this point? */
+/**
+ * hl_mem_mgr_idr_destroy() - destroy memory manager IDR.
+ * @mmg: parent unified memory manager
+ *
+ * Destroy the memory manager IDR.
+ * Shall be called when IDR is empty and no memory buffers are in use.
+ */
+void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg)
+{
+	if (!idr_is_empty(&mmg->handles))
+		dev_crit(mmg->dev, "memory manager IDR is destroyed while it is not empty!\n");

 	idr_destroy(&mmg->handles);
 }
--- a/drivers/accel/habanalabs/common/mmu/mmu.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu.c
@ -540,8 +540,8 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
 		u32 page_off;

 		/*
-		 * Bit arithmetics cannot be used for non power of two page
-		 * sizes. In addition, since bit arithmetics is not used,
+		 * Bit arithmetic cannot be used for non power of two page
+		 * sizes. In addition, since bit arithmetic is not used,
 		 * we cannot ignore dram base. All that shall be considered.
 		 */

@ -757,7 +757,7 @@ u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 * @mmu_prop: MMU properties.
 * @hop_idx: HOP index.
 * @hop_addr: HOP address.
- * @virt_addr: virtual address fro the translation.
+ * @virt_addr: virtual address for the translation.
 *
 * @return the matching PTE value on success, otherwise U64_MAX.
 */
--- a/drivers/accel/habanalabs/common/security.c
+++ b/drivers/accel/habanalabs/common/security.c
@ -502,7 +502,7 @@ free_glbl_sec:
 int hl_init_pb_ranges_single_dcore(struct hl_device *hdev, u32 dcore_offset,
 		u32 num_instances, u32 instance_offset,
 		const u32 pb_blocks[], u32 blocks_array_size,
-		const struct range *regs_range_array, u32 regs_range_array_size)
+		const struct range *user_regs_range_array, u32 user_regs_range_array_size)
 {
 	int i;
 	struct hl_block_glbl_sec *glbl_sec;
@ -514,8 +514,8 @@ int hl_init_pb_ranges_single_dcore(struct hl_device *hdev, u32 dcore_offset,
 		return -ENOMEM;

 	hl_secure_block(hdev, glbl_sec, blocks_array_size);
-	hl_unsecure_registers_range(hdev, regs_range_array,
-			regs_range_array_size, 0, pb_blocks, glbl_sec,
+	hl_unsecure_registers_range(hdev, user_regs_range_array,
+			user_regs_range_array_size, 0, pb_blocks, glbl_sec,
 			blocks_array_size);

 	/* Fill all blocks with the same configuration */
--- a/drivers/accel/habanalabs/common/security.h
+++ b/drivers/accel/habanalabs/common/security.h
@ -10,7 +10,7 @@

 #include <linux/io-64-nonatomic-lo-hi.h>

-extern struct hl_device *hdev;
+struct hl_device;

 /* special blocks */
 #define HL_MAX_NUM_OF_GLBL_ERR_CAUSE		10
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@ -656,6 +656,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 	prop->cfg_size = CFG_SIZE;
 	prop->max_asid = MAX_ASID;
 	prop->num_of_events = GAUDI_EVENT_SIZE;
+	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;

 	set_default_power_values(hdev);
@ -679,6 +680,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 			(num_sync_stream_queues * HL_RSVD_MONS);

 	prop->first_available_user_interrupt = USHRT_MAX;
+	prop->tpc_interrupt_id = USHRT_MAX;

 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
 		prop->first_available_cq[i] = USHRT_MAX;
@ -867,13 +869,18 @@ pci_init:
 	rc = hl_fw_read_preboot_status(hdev);
 	if (rc) {
 		if (hdev->reset_on_preboot_fail)
+			/* we are already on failure flow, so don't check if hw_fini fails. */
 			hdev->asic_funcs->hw_fini(hdev, true, false);
 		goto pci_fini;
 	}

 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
-		hdev->asic_funcs->hw_fini(hdev, true, false);
+		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+		if (rc) {
+			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
+			goto pci_fini;
+		}
 	}

 	return 0;
@ -3718,7 +3725,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
 		if (rc) {
 			dev_err(hdev->dev,
 				"failed to set hop0 addr for asid %d\n", i);
-			goto err;
+			return rc;
 		}
 	}

@ -3729,7 +3736,9 @@ static int gaudi_mmu_init(struct hl_device *hdev)
 	/* mem cache invalidation */
 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);

-	hl_mmu_invalidate_cache(hdev, true, 0);
+	rc = hl_mmu_invalidate_cache(hdev, true, 0);
+	if (rc)
+		return rc;

 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
@ -3745,9 +3754,6 @@ static int gaudi_mmu_init(struct hl_device *hdev)
 	gaudi->hw_cap_initialized |= HW_CAP_MMU;

 	return 0;
-
-err:
-	return rc;
 }

 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
@ -4068,7 +4074,7 @@ disable_queues:
 	return rc;
 }

-static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
+static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
 	struct cpu_dyn_regs *dyn_regs =
 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
@ -4078,7 +4084,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset

 	if (!hard_reset) {
 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
-		return;
+		return 0;
 	}

 	if (hdev->pldm) {
@ -4199,10 +4205,10 @@ skip_reset:
 	msleep(reset_timeout_ms);

 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
-	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
-		dev_err(hdev->dev,
-			"Timeout while waiting for device to reset 0x%x\n",
-			status);
+	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
+		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
+		return -ETIMEDOUT;
+	}

 	if (gaudi) {
 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
@ -4215,6 +4221,7 @@ skip_reset:

 		hdev->device_cpu_is_halted = false;
 	}
+	return 0;
 }

 static int gaudi_suspend(struct hl_device *hdev)
@ -7297,7 +7304,7 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 }

 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
-					bool razwi, u64 *event_mask)
+					bool check_razwi, u64 *event_mask)
 {
 	bool is_read = false, is_write = false;
 	u16 engine_id[2], num_of_razwi_eng = 0;
@ -7316,7 +7323,7 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
 		event_type, desc);

-	if (razwi) {
+	if (check_razwi) {
 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
 						&is_write);
 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
@ -7333,8 +7340,9 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
 				num_of_razwi_eng = 1;
 		}

-		hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags,
-				event_mask);
+		if (razwi_flags)
+			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
+					razwi_flags, event_mask);
 	}
 }

@ -7633,6 +7641,7 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type,
 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
+	struct hl_info_fw_err_info fw_err_info;
 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
 	u32 fw_fatal_err_flag = 0, flags = 0;
@ -7911,7 +7920,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_FW_ALIVE_S:
 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
-		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
+		fw_err_info.event_id = event_type;
+		fw_err_info.event_mask = &event_mask;
+		hl_handle_fw_err(hdev, &fw_err_info);
 		goto reset_device;

 	default:
@ -7942,6 +7954,10 @@ reset_device:
 	}

 	if (reset_required) {
+		/* escalate general hw errors to critical/fatal error */
+		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
+			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
+
 		hl_device_cond_reset(hdev, flags, event_mask);
 	} else {
 		hl_fw_unmask_irq(hdev, event_type);
@ -8403,19 +8419,26 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
 	}

 	mutex_lock(&hdev->mmu_lock);
+
 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
 			hdev->internal_cb_pool_dma_addr,
 			HOST_SPACE_INTERNAL_CB_SZ);
-
-	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
-	mutex_unlock(&hdev->mmu_lock);
-
 	if (rc)
 		goto unreserve_internal_cb_pool;

+	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
+	if (rc)
+		goto unmap_internal_cb_pool;
+
+	mutex_unlock(&hdev->mmu_lock);
+
 	return 0;

+unmap_internal_cb_pool:
+	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
+			HOST_SPACE_INTERNAL_CB_SZ);
 unreserve_internal_cb_pool:
+	mutex_unlock(&hdev->mmu_lock);
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
 			HOST_SPACE_INTERNAL_CB_SZ);
 destroy_internal_cb_pool:
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@ -387,6 +387,8 @@ enum gaudi2_edma_id {
 * We have 64 CQ's per dcore, CQ0 in dcore 0 is reserved for legacy mode
 */
 #define GAUDI2_NUM_USER_INTERRUPTS 255
+#define GAUDI2_NUM_RESERVED_INTERRUPTS 1
+#define GAUDI2_TOTAL_USER_INTERRUPTS (GAUDI2_NUM_USER_INTERRUPTS + GAUDI2_NUM_RESERVED_INTERRUPTS)

 enum gaudi2_irq_num {
 	GAUDI2_IRQ_NUM_EVENT_QUEUE = GAUDI2_EVENT_QUEUE_MSIX_IDX,
@ -410,12 +412,15 @@ enum gaudi2_irq_num {
 	GAUDI2_IRQ_NUM_SHARED_DEC0_ABNRM,
 	GAUDI2_IRQ_NUM_SHARED_DEC1_NRM,
 	GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM,
+	GAUDI2_IRQ_NUM_DEC_LAST = GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM,
 	GAUDI2_IRQ_NUM_COMPLETION,
 	GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
 	GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
+	GAUDI2_IRQ_NUM_TPC_ASSERT,
 	GAUDI2_IRQ_NUM_RESERVED_FIRST,
-	GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_USER_INTERRUPTS - 1),
-	GAUDI2_IRQ_NUM_USER_FIRST,
+	GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
+	GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
+	GAUDI2_IRQ_NUM_USER_FIRST = GAUDI2_IRQ_NUM_UNEXPECTED_ERROR + 1,
 	GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1),
 	GAUDI2_IRQ_NUM_LAST = (GAUDI2_MSIX_ENTRIES - 1)
 };
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@ -2657,7 +2657,7 @@ int gaudi2_coresight_init(struct hl_device *hdev)
 	/*
 	 * Mask out all the disabled binned offsets.
 	 * so when user request to configure a binned or masked out component,
-	 * driver will ignore programing it ( happens when offset value is set to 0x0 )
+	 * driver will ignore programming it ( happens when offset value is set to 0x0 )
 	 * this is being set in gaudi2_coresight_set_disabled_components
 	 */

--- a/drivers/accel/habanalabs/gaudi2/gaudi2_masks.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_masks.h
@ -79,7 +79,6 @@
 			DCORE0_MME_CTRL_LO_ARCH_STATUS_QM_RDY_MASK)

 #define TPC_IDLE_MASK	(DCORE0_TPC0_CFG_STATUS_SCALAR_PIPE_EMPTY_MASK | \
-			DCORE0_TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK | \
 			DCORE0_TPC0_CFG_STATUS_IQ_EMPTY_MASK | \
 			DCORE0_TPC0_CFG_STATUS_SB_EMPTY_MASK | \
 			DCORE0_TPC0_CFG_STATUS_QM_IDLE_MASK | \
@ -87,6 +86,8 @@

 #define DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK 0x100

+#define DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK 0x40
+
 /* CGM_IDLE_MASK is valid for all engines CGM idle check */
 #define CGM_IDLE_MASK	DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK

--- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
@ -1595,6 +1595,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = {
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_30,
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_31,
 	mmDCORE0_TPC0_CFG_TPC_SB_L0CD,
+	mmDCORE0_TPC0_CFG_TPC_ID,
 	mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC,
 	mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0,
 	mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_1,
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@ -472,6 +472,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 	prop->max_pending_cs = GOYA_MAX_PENDING_CS;

 	prop->first_available_user_interrupt = USHRT_MAX;
+	prop->tpc_interrupt_id = USHRT_MAX;

 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
 		prop->first_available_cq[i] = USHRT_MAX;
@ -668,13 +669,18 @@ pci_init:
 	rc = hl_fw_read_preboot_status(hdev);
 	if (rc) {
 		if (hdev->reset_on_preboot_fail)
+			/* we are already on failure flow, so don't check if hw_fini fails. */
 			hdev->asic_funcs->hw_fini(hdev, true, false);
 		goto pci_fini;
 	}

 	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
-		hdev->asic_funcs->hw_fini(hdev, true, false);
+		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+		if (rc) {
+			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
+			goto pci_fini;
+		}
 	}

 	if (!hdev->pldm) {
@ -2782,7 +2788,7 @@ disable_queues:
 	return rc;
 }

-static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
+static int goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
 	struct goya_device *goya = hdev->asic_specific;
 	u32 reset_timeout_ms, cpu_timeout_ms, status;
@ -2828,17 +2834,17 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 	msleep(reset_timeout_ms);

 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
-	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
-		dev_err(hdev->dev,
-			"Timeout while waiting for device to reset 0x%x\n",
-			status);
+	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
+		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
+		return -ETIMEDOUT;
+	}

 	if (!hard_reset && goya) {
 		goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
 						HW_CAP_GOLDEN | HW_CAP_TPC);
 		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
 				GOYA_ASYNC_EVENT_ID_SOFT_RESET);
-		return;
+		return 0;
 	}

 	/* Chicken bit to re-initiate boot sequencer flow */
@ -2857,6 +2863,7 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)

 		memset(goya->events_stat, 0, sizeof(goya->events_stat));
 	}
+	return 0;
 }

 int goya_suspend(struct hl_device *hdev)
--- a/drivers/accel/habanalabs/include/common/cpucp_if.h
+++ b/drivers/accel/habanalabs/include/common/cpucp_if.h
@ -357,6 +357,7 @@ struct hl_eq_addr_dec_intr_data {
 struct hl_eq_entry {
 	struct hl_eq_header hdr;
 	union {
+		__le64 data_placeholder;
 		struct hl_eq_ecc_data ecc_data;
 		struct hl_eq_hbm_ecc_data hbm_ecc_data;	/* Gaudi1 HBM */
 		struct hl_eq_sm_sei_data sm_sei_data;
@ -661,6 +662,9 @@ enum pq_init_status {
 * CPUCP_PACKET_ACTIVE_STATUS_SET -
 *       LKD sends FW indication whether device is free or in use, this indication is reported
 *       also to the BMC.
+ *
+ * CPUCP_PACKET_REGISTER_INTERRUPTS -
+ *       Packet to register interrupts indicating LKD is ready to receive events from FW.
 */

 enum cpucp_packet_id {
@ -725,6 +729,8 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_RESERVED9,			/* not used */
 	CPUCP_PACKET_RESERVED10,		/* not used */
 	CPUCP_PACKET_RESERVED11,		/* not used */
+	CPUCP_PACKET_RESERVED12,		/* internal */
+	CPUCP_PACKET_REGISTER_INTERRUPTS,	/* internal */
 	CPUCP_PACKET_ID_MAX			/* must be last */
 };

@ -1127,6 +1133,7 @@ struct cpucp_security_info {
 *                     (0 = functional 1 = binned)
 * @interposer_version: Interposer version programmed in eFuse
 * @substrate_version: Substrate version programmed in eFuse
+ * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM.
 * @fw_os_version: Firmware OS Version
 */
 struct cpucp_info {
@ -1154,7 +1161,7 @@ struct cpucp_info {
 	__u8 substrate_version;
 	__u8 reserved2;
 	struct cpucp_security_info sec_info;
-	__le32 reserved3;
+	__le32 fw_hbm_region_size;
 	__u8 pll_map[PLL_MAP_LEN];
 	__le64 mme_binning_mask;
 	__u8 fw_os_version[VERSION_MAX_LEN];
--- a/drivers/accel/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/accel/habanalabs/include/common/hl_boot_if.h
@ -770,15 +770,23 @@ enum hl_components {
 	HL_COMPONENTS_ARMCP,
 	HL_COMPONENTS_CPLD,
 	HL_COMPONENTS_UBOOT,
+	HL_COMPONENTS_FUSE,
 	HL_COMPONENTS_MAX_NUM = 16
 };

+#define NAME_MAX_LEN	32 /* bytes */
+struct hl_module_data {
+	__u8 name[NAME_MAX_LEN];
+	__u8 version[VERSION_MAX_LEN];
+};
+
 /**
 * struct hl_component_versions - versions associated with hl component.
 * @struct_size: size of all the struct (including dynamic size of modules).
 * @modules_offset: offset of the modules field in this struct.
 * @component: version of the component itself.
 * @fw_os: Firmware OS Version.
+ * @comp_name: Name of the component.
 * @modules_mask: i'th bit (from LSB) is a flag - on if module i in enum
 *              hl_modules is used.
 * @modules_counter: number of set bits in modules_mask.
@ -791,45 +799,14 @@ struct hl_component_versions {
 	__le16 modules_offset;
 	__u8 component[VERSION_MAX_LEN];
 	__u8 fw_os[VERSION_MAX_LEN];
+	__u8 comp_name[NAME_MAX_LEN];
 	__le16 modules_mask;
 	__u8 modules_counter;
 	__u8 reserved[1];
-	__u8 modules[][VERSION_MAX_LEN];
+	struct hl_module_data modules[];
 };

-/**
- * struct hl_fw_versions - all versions (fuse, cpucp's components with their
- *              modules)
- * @struct_size: size of all the struct (including dynamic size of components).
- * @components_offset: offset of the components field in this struct.
- * @fuse: silicon production FUSE information.
- * @components_mask: i'th bit (from LSB) is a flag - on if component i in enum
- *              hl_components is used.
- * @components_counter: number of set bits in components_mask.
- * @reserved: reserved for future use.
- * @components: versions of hl components. Index i corresponds to the i'th bit
- *              that is *on* in components_mask. For example, if
- *              components_mask=0b101, then *components represents arcpid and
- *              *(hl_component_versions*)((char*)components + 1') represents
- *              preboot, where 1' = components[0].struct_size.
- */
-struct hl_fw_versions {
-	__le16 struct_size;
-	__le16 components_offset;
-	__u8 fuse[VERSION_MAX_LEN];
-	__le16 components_mask;
-	__u8 components_counter;
-	__u8 reserved[1];
-	struct hl_component_versions components[];
-};
-
-/* Max size of struct hl_component_versions */
-#define HL_COMPONENT_VERSIONS_MAX_SIZE \
-	(sizeof(struct hl_component_versions) + HL_MODULES_MAX_NUM * \
-	 VERSION_MAX_LEN)
-
-/* Max size of struct hl_fw_versions */
-#define HL_FW_VERSIONS_MAX_SIZE (sizeof(struct hl_fw_versions) + \
-		HL_COMPONENTS_MAX_NUM * HL_COMPONENT_VERSIONS_MAX_SIZE)
+/* Max size of fit size */
+#define HL_FW_VERSIONS_FIT_SIZE	4096

 #endif /* HL_BOOT_IF_H */
--- a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@ -164,6 +164,8 @@

 #define mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR	0x4800040

+#define mmDCORE0_TPC0_EML_CFG_DBG_CNT		0x40000
+
 #define SM_OBJS_PROT_BITS_OFFS			0x14000

 #define DCORE_OFFSET			(mmDCORE1_TPC0_QM_BASE - mmDCORE0_TPC0_QM_BASE)
@ -185,7 +187,10 @@
 #define TPC_CFG_STALL_ON_ERR_OFFSET	(mmDCORE0_TPC0_CFG_STALL_ON_ERR - mmDCORE0_TPC0_CFG_BASE)
 #define TPC_CFG_TPC_INTR_MASK_OFFSET	(mmDCORE0_TPC0_CFG_TPC_INTR_MASK - mmDCORE0_TPC0_CFG_BASE)
 #define TPC_CFG_MSS_CONFIG_OFFSET	(mmDCORE0_TPC0_CFG_MSS_CONFIG - mmDCORE0_TPC0_CFG_BASE)
+#define TPC_EML_CFG_DBG_CNT_OFFSET	(mmDCORE0_TPC0_EML_CFG_DBG_CNT - mmDCORE0_TPC0_EML_CFG_BASE)

+#define EDMA_CORE_CFG_STALL_OFFSET	(mmDCORE0_EDMA0_CORE_CFG_1 - mmDCORE0_EDMA0_CORE_BASE)
+#define MME_CTRL_LO_QM_STALL_OFFSET	(mmDCORE0_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_BASE)
 #define MME_ACC_INTR_MASK_OFFSET	(mmDCORE0_MME_ACC_INTR_MASK - mmDCORE0_MME_ACC_BASE)
 #define MME_ACC_WR_AXI_AGG_COUT0_OFFSET	(mmDCORE0_MME_ACC_WR_AXI_AGG_COUT0 - mmDCORE0_MME_ACC_BASE)
 #define MME_ACC_WR_AXI_AGG_COUT1_OFFSET	(mmDCORE0_MME_ACC_WR_AXI_AGG_COUT1 - mmDCORE0_MME_ACC_BASE)
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
@ -63,6 +63,8 @@
 #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START	0xFFF0F80000000000ull
 #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END	0xFFF0FFFFFFFFFFFFull

+#define RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT	256
+
 #define GAUDI2_MSIX_ENTRIES	512

 #define QMAN_PQ_ENTRY_SIZE	16			/* Bytes */
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
 *
- * Copyright 2018-2021 HabanaLabs, Ltd.
+ * Copyright 2018-2022 HabanaLabs, Ltd.
 * All Rights Reserved.
 *
 */
@ -958,7 +958,7 @@ enum gaudi2_async_event_id {
 	GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318,
 	GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
 	GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
-	GAUDI2_EVENT_DEV_RESET_REQ = 1321,
+	GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
 	GAUDI2_EVENT_SIZE,
 };

--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
@ -63,7 +63,10 @@ struct gaudi2_cold_rst_data {
 			u32 fake_sig_validation_en : 1;
 			u32 bist_skip_enable : 1;
 			u32 bist_need_iatu_config : 1;
-			u32 reserved : 24;
+			u32 fake_bis_compliant : 1;
+			u32 wd_rst_cause_arm : 1;
+			u32 wd_rst_cause_arcpid : 1;
+			u32 reserved : 21;
 		};
 		__le32 data;
 	};
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@ -408,7 +408,8 @@ static inline bool drm_is_render_client(const struct drm_file *file_priv)
 * Returns true if this is an open file of the compute acceleration node, i.e.
 * &drm_file.minor of @file_priv is a accel minor.
 *
- * See also the :ref:`section on accel nodes <drm_accel_node>`.
+ * See also :doc:`Introduction to compute accelerators subsystem
+ * </accel/introduction>`.
 */
 static inline bool drm_is_accel_client(const struct drm_file *file_priv)
 {
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@ -723,6 +723,10 @@ enum hl_server_type {
 * HL_NOTIFIER_EVENT_GENERAL_HW_ERR     - Indicates device HW error
 * HL_NOTIFIER_EVENT_RAZWI              - Indicates razwi happened
 * HL_NOTIFIER_EVENT_PAGE_FAULT         - Indicates page fault happened
+ * HL_NOTIFIER_EVENT_CRITICAL_HW_ERR    - Indicates a HW error that requires SW abort and
+ *                                        HW reset
+ * HL_NOTIFIER_EVENT_CRITICAL_FW_ERR    - Indicates a FW error that requires SW abort and
+ *                                        HW reset
 */
 #define HL_NOTIFIER_EVENT_TPC_ASSERT		(1ULL << 0)
 #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE	(1ULL << 1)
@ -733,6 +737,8 @@ enum hl_server_type {
 #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR	(1ULL << 6)
 #define HL_NOTIFIER_EVENT_RAZWI			(1ULL << 7)
 #define HL_NOTIFIER_EVENT_PAGE_FAULT		(1ULL << 8)
+#define HL_NOTIFIER_EVENT_CRITICL_HW_ERR	(1ULL << 9)
+#define HL_NOTIFIER_EVENT_CRITICL_FW_ERR	(1ULL << 10)

 /* Opcode for management ioctl
 *
@ -790,6 +796,8 @@ enum hl_server_type {
 * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault.
 * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event.
 * HL_INFO_FW_GENERIC_REQ - Send generic request to FW.
+ * HL_INFO_HW_ERR_EVENT   - Retrieve information on the reported HW error.
+ * HL_INFO_FW_ERR_EVENT   - Retrieve information on the reported FW error.
 */
 #define HL_INFO_HW_IP_INFO			0
 #define HL_INFO_HW_EVENTS			1
@ -824,6 +832,8 @@ enum hl_server_type {
 #define HL_INFO_PAGE_FAULT_EVENT		33
 #define HL_INFO_USER_MAPPINGS			34
 #define HL_INFO_FW_GENERIC_REQ			35
+#define HL_INFO_HW_ERR_EVENT			36
+#define HL_INFO_FW_ERR_EVENT			37

 #define HL_INFO_VERSION_MAX_LEN			128
 #define HL_INFO_CARD_NAME_MAX_LEN		16
@ -875,6 +885,12 @@ enum hl_server_type {
 *                             application to use. Relevant for Gaudi2 and later.
 * @device_mem_alloc_default_page_size: default page size used in device memory allocation.
 * @revision_id: PCI revision ID of the ASIC.
+ * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
+ * @rotator_enabled_mask: Bit-mask that represents which rotators are enabled.
+ *                        Relevant for Gaudi3 and later.
+ * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use
+ *                                  in order to raise events toward FW.
+ * @reserved_dram_size: DRAM size reserved for driver and firmware.
 */
 struct hl_info_hw_ip_info {
 	__u64 sram_base_address;
@ -902,15 +918,20 @@ struct hl_info_hw_ip_info {
 	__u64 dram_page_size;
 	__u32 edma_enabled_mask;
 	__u16 number_of_user_interrupts;
-	__u16 pad2;
-	__u64 reserved4;
+	__u8 reserved1;
+	__u8 reserved2;
+	__u64 reserved3;
 	__u64 device_mem_alloc_default_page_size;
+	__u64 reserved4;
 	__u64 reserved5;
-	__u64 reserved6;
-	__u32 reserved7;
-	__u8 reserved8;
+	__u32 reserved6;
+	__u8 reserved7;
 	__u8 revision_id;
-	__u8 pad[2];
+	__u16 tpc_interrupt_id;
+	__u32 rotator_enabled_mask;
+	__u32 reserved9;
+	__u64 engine_core_interrupt_reg_addr;
+	__u64 reserved_dram_size;
 };

 struct hl_info_dram_usage {
@ -1161,6 +1182,39 @@ struct hl_info_undefined_opcode_event {
 	__u32 stream_id;
 };

+/**
+ * struct hl_info_hw_err_event - info about HW error
+ * @timestamp: timestamp of error occurrence
+ * @event_id: The async event ID (specific to each device type).
+ * @pad: size padding for u64 granularity.
+ */
+struct hl_info_hw_err_event {
+	__s64 timestamp;
+	__u16 event_id;
+	__u16 pad[3];
+};
+
+/* FW error definition for event_type in struct hl_info_fw_err_event */
+enum hl_info_fw_err_type {
+	HL_INFO_FW_HEARTBEAT_ERR,
+	HL_INFO_FW_REPORTED_ERR,
+};
+
+/**
+ * struct hl_info_fw_err_event - info about FW error
+ * @timestamp: time-stamp of error occurrence
+ * @err_type: The type of event as defined in hl_info_fw_err_type.
+ * @event_id: The async event ID (specific to each device type, applicable only when event type is
+ *             HL_INFO_FW_REPORTED_ERR).
+ * @pad: size padding for u64 granularity.
+ */
+struct hl_info_fw_err_event {
+	__s64 timestamp;
+	__u16 err_type;
+	__u16 event_id;
+	__u32 pad;
+};
+
 /**
 * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information.
 * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size
@ -1486,17 +1540,31 @@ struct hl_cs_chunk {
 */
 #define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES	0x8000

+/*
+ * The engines CS is merged into the existing CS ioctls.
+ * Use it to control engines modes.
+ */
+#define HL_CS_FLAGS_ENGINES_COMMAND		0x10000
+
 #define HL_CS_STATUS_SUCCESS		0

 #define HL_MAX_JOBS_PER_CS		512

-/* HL_ENGINE_CORE_ values
+/*
+ * enum hl_engine_command - engine command
 *
- * HL_ENGINE_CORE_HALT: engine core halt
- * HL_ENGINE_CORE_RUN:  engine core run
+ * @HL_ENGINE_CORE_HALT: engine core halt
+ * @HL_ENGINE_CORE_RUN: engine core run
+ * @HL_ENGINE_STALL: user engine/s stall
+ * @HL_ENGINE_RESUME: user engine/s resume
 */
-#define HL_ENGINE_CORE_HALT	(1 << 0)
-#define HL_ENGINE_CORE_RUN	(1 << 1)
+enum hl_engine_command {
+	HL_ENGINE_CORE_HALT = 1,
+	HL_ENGINE_CORE_RUN = 2,
+	HL_ENGINE_STALL = 3,
+	HL_ENGINE_RESUME = 4,
+	HL_ENGINE_COMMAND_MAX
+};

 struct hl_cs_in {

@ -1520,6 +1588,18 @@ struct hl_cs_in {
 			/* the core command to be sent towards engine cores */
 			__u32 core_command;
 		};
+
+		/* Valid only when HL_CS_FLAGS_ENGINES_COMMAND is set */
+		struct {
+			/* this holds address of array of uint32 for engines */
+			__u64 engines;
+
+			/* number of engines in engines array */
+			__u32 num_engines;
+
+			/* the engine command to be sent towards engines */
+			__u32 engine_command;
+		};
 	};

 	union {