From 0ca3b1b7b9652875861eaa24ae2c514771c2750e Mon Sep 17 00:00:00 2001 From: Igor Grinberg Date: Sun, 24 Feb 2019 11:20:02 +0200 Subject: [PATCH 01/31] habanalabs: add new device CPU boot status This patch adds a definition of a new status in the device CPU boot stages and add the handling of the new status. Signed-off-by: Igor Grinberg Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 5 +++++ drivers/misc/habanalabs/include/hl_boot_if.h | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ea979ebd62fb..0185f11c5577 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2550,6 +2550,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) "ARM status %d - DDR initialization failed\n", status); break; + case CPU_BOOT_STATUS_UBOOT_NOT_READY: + dev_err(hdev->dev, + "ARM status %d - u-boot stopped by user\n", + status); + break; default: dev_err(hdev->dev, "ARM status %d - Invalid status code\n", diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h index 7475732b9996..4cd04c090285 100644 --- a/drivers/misc/habanalabs/include/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/hl_boot_if.h @@ -18,7 +18,8 @@ enum cpu_boot_status { CPU_BOOT_STATUS_IN_SPL, CPU_BOOT_STATUS_IN_UBOOT, CPU_BOOT_STATUS_DRAM_INIT_FAIL, - CPU_BOOT_STATUS_FIT_CORRUPTED + CPU_BOOT_STATUS_FIT_CORRUPTED, + CPU_BOOT_STATUS_UBOOT_NOT_READY, }; enum kmd_msg { From b24ca4587e2b940b22de4490209b8d785bf487a5 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 24 Feb 2019 15:50:53 +0200 Subject: [PATCH 02/31] habanalabs: rename goya_non_fatal_events array to all events The goya_non_fatal_events array actually contains all the possible events the driver can receive from the F/W. Therefore, use a proper name for the array. The patch also adds missing event Ids to the goya_async_event_id enum. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 8 +++----- drivers/misc/habanalabs/include/goya/goya_async_events.h | 9 +++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0185f11c5577..3dfac5393269 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -176,9 +176,7 @@ static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = { mmMME_WBC_CONTROL_DATA }; -#define GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE 121 - -static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = { +static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_PCIE_IF, GOYA_ASYNC_EVENT_ID_TPC0_ECC, GOYA_ASYNC_EVENT_ID_TPC1_ECC, @@ -4627,8 +4625,8 @@ static int goya_soft_reset_late_init(struct hl_device *hdev) * Unmask all IRQs since some could have been received * during the soft reset */ - return goya_unmask_irq_arr(hdev, goya_non_fatal_events, - sizeof(goya_non_fatal_events)); + return goya_unmask_irq_arr(hdev, goya_all_events, + sizeof(goya_all_events)); } static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) diff --git a/drivers/misc/habanalabs/include/goya/goya_async_events.h b/drivers/misc/habanalabs/include/goya/goya_async_events.h index 497937a17ee9..bb7a1aa3279e 100644 --- a/drivers/misc/habanalabs/include/goya/goya_async_events.h +++ b/drivers/misc/habanalabs/include/goya/goya_async_events.h @@ -9,7 +9,9 @@ #define __GOYA_ASYNC_EVENTS_H_ enum goya_async_event_id { + GOYA_ASYNC_EVENT_ID_PCIE_CORE = 32, GOYA_ASYNC_EVENT_ID_PCIE_IF = 33, + GOYA_ASYNC_EVENT_ID_PCIE_PHY = 34, GOYA_ASYNC_EVENT_ID_TPC0_ECC = 36, GOYA_ASYNC_EVENT_ID_TPC1_ECC = 39, GOYA_ASYNC_EVENT_ID_TPC2_ECC = 42, @@ -23,6 +25,8 @@ enum goya_async_event_id { GOYA_ASYNC_EVENT_ID_MMU_ECC = 63, GOYA_ASYNC_EVENT_ID_DMA_MACRO = 64, GOYA_ASYNC_EVENT_ID_DMA_ECC = 66, + GOYA_ASYNC_EVENT_ID_DDR0_PARITY = 69, + GOYA_ASYNC_EVENT_ID_DDR1_PARITY = 72, GOYA_ASYNC_EVENT_ID_CPU_IF_ECC = 75, GOYA_ASYNC_EVENT_ID_PSOC_MEM = 78, GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT = 79, @@ -72,6 +76,7 @@ enum goya_async_event_id { GOYA_ASYNC_EVENT_ID_MME_WACSD = 142, GOYA_ASYNC_EVENT_ID_PLL0 = 143, GOYA_ASYNC_EVENT_ID_PLL1 = 144, + GOYA_ASYNC_EVENT_ID_PLL2 = 145, GOYA_ASYNC_EVENT_ID_PLL3 = 146, GOYA_ASYNC_EVENT_ID_PLL4 = 147, GOYA_ASYNC_EVENT_ID_PLL5 = 148, @@ -81,6 +86,7 @@ enum goya_async_event_id { GOYA_ASYNC_EVENT_ID_PSOC = 160, GOYA_ASYNC_EVENT_ID_PCIE_FLR = 171, GOYA_ASYNC_EVENT_ID_PCIE_HOT_RESET = 172, + GOYA_ASYNC_EVENT_ID_PCIE_PERST = 173, GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG0 = 174, GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG1 = 175, GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG2 = 176, @@ -144,8 +150,11 @@ enum goya_async_event_id { GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_0 = 330, GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_1 = 331, GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_2 = 332, + GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_3 = 333, + GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_4 = 334, GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET = 356, GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT = 361, + GOYA_ASYNC_EVENT_ID_FAN = 425, GOYA_ASYNC_EVENT_ID_TPC0_CMDQ = 430, GOYA_ASYNC_EVENT_ID_TPC1_CMDQ = 431, GOYA_ASYNC_EVENT_ID_TPC2_CMDQ = 432, From 5eb420446a595ee111828641bff1f3a125fc5c0d Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 27 Feb 2019 09:55:57 +0200 Subject: [PATCH 03/31] habanalabs: remove implicit include from header files This will prevent unneeded include of header files, which may increase compilation time. Signed-off-by: Dotan Barak Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya_security.c | 1 + drivers/misc/habanalabs/include/goya/goya.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya_security.c b/drivers/misc/habanalabs/goya/goya_security.c index 575003238401..9d2dee67e46f 100644 --- a/drivers/misc/habanalabs/goya/goya_security.c +++ b/drivers/misc/habanalabs/goya/goya_security.c @@ -6,6 +6,7 @@ */ #include "goyaP.h" +#include "include/goya/asic_reg/goya_regs.h" /* * goya_set_block_as_protected - set the given block as protected diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h index 614149efa412..3f02a52ba4ce 100644 --- a/drivers/misc/habanalabs/include/goya/goya.h +++ b/drivers/misc/habanalabs/include/goya/goya.h @@ -8,10 +8,6 @@ #ifndef GOYA_H #define GOYA_H -#include "asic_reg/goya_regs.h" - -#include - #define SRAM_CFG_BAR_ID 0 #define MSIX_BAR_ID 2 #define DDR_BAR_ID 4 From 3110c60fdc7a5a7626b7cd401c4918751d7c19db Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 4 Mar 2019 10:22:09 +0200 Subject: [PATCH 04/31] habanalabs: Move device CPU code into common file This patch moves the code that is responsible of the communication vs. the F/W to a dedicated file. This will allow us to share the code between different ASICs. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/Makefile | 2 +- drivers/misc/habanalabs/firmware_if.c | 325 +++++++++++++++++++ drivers/misc/habanalabs/goya/goya.c | 361 ++++----------------- drivers/misc/habanalabs/goya/goyaP.h | 11 - drivers/misc/habanalabs/habanalabs.h | 17 + drivers/misc/habanalabs/include/armcp_if.h | 8 + 6 files changed, 408 insertions(+), 316 deletions(-) create mode 100644 drivers/misc/habanalabs/firmware_if.c diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index c6592db59b25..598740d235f8 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -6,7 +6,7 @@ obj-m := habanalabs.o habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \ command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \ - command_submission.o mmu.o + command_submission.o mmu.o firmware_if.o habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c new file mode 100644 index 000000000000..1acf82650b20 --- /dev/null +++ b/drivers/misc/habanalabs/firmware_if.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include +#include +#include + +/** + * hl_fw_push_fw_to_device() - Push FW code to device. + * @hdev: pointer to hl_device structure. + * + * Copy fw code from firmware file to device memory. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst) +{ + const struct firmware *fw; + const u64 *fw_data; + size_t fw_size, i; + int rc; + + rc = request_firmware(&fw, fw_name, hdev->dev); + if (rc) { + dev_err(hdev->dev, "Failed to request %s\n", fw_name); + goto out; + } + + fw_size = fw->size; + if ((fw_size % 4) != 0) { + dev_err(hdev->dev, "illegal %s firmware size %zu\n", + fw_name, fw_size); + rc = -EINVAL; + goto out; + } + + dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); + + fw_data = (const u64 *) fw->data; + + if ((fw->size % 8) != 0) + fw_size -= 8; + + for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) { + if (!(i & (0x80000 - 1))) { + dev_dbg(hdev->dev, + "copied so far %zu out of %zu for %s firmware", + i, fw_size, fw_name); + usleep_range(20, 100); + } + + writeq(*fw_data, dst); + } + + if ((fw->size % 8) != 0) + writel(*(const u32 *) fw_data, dst); + +out: + release_firmware(fw); + return rc; +} + +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) +{ + struct armcp_packet pkt = {}; + + pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); + + return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, + sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); +} + +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result) +{ + struct armcp_packet *pkt; + dma_addr_t pkt_dma_addr; + u32 tmp; + int rc = 0; + + if (len > HL_CPU_CB_SIZE) { + dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n", + len); + return -ENOMEM; + } + + pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, + &pkt_dma_addr); + if (!pkt) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for packet to CPU\n"); + return -ENOMEM; + } + + memcpy(pkt, msg, len); + + mutex_lock(&hdev->send_cpu_message_lock); + + if (hdev->disabled) + goto out; + + if (hdev->device_cpu_disabled) { + rc = -EIO; + goto out; + } + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); + if (rc) { + dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); + goto out; + } + + rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence, + timeout, &tmp); + + hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, "Timeout while waiting for device CPU\n"); + hdev->device_cpu_disabled = true; + goto out; + } + + if (tmp == ARMCP_PACKET_FENCE_VAL) { + u32 ctl = le32_to_cpu(pkt->ctl); + + rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; + if (rc) { + dev_err(hdev->dev, + "F/W ERROR %d for CPU packet %d\n", + rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK) + >> ARMCP_PKT_CTL_OPCODE_SHIFT); + rc = -EINVAL; + } else if (result) { + *result = (long) le64_to_cpu(pkt->result); + } + } else { + dev_err(hdev->dev, "CPU packet wrong fence value\n"); + rc = -EINVAL; + } + +out: + mutex_unlock(&hdev->send_cpu_message_lock); + + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); + + return rc; +} + +int hl_fw_test_cpu_queue(struct hl_device *hdev) +{ + struct armcp_packet test_pkt = {}; + long result; + int rc; + + test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << + ARMCP_PKT_CTL_OPCODE_SHIFT); + test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, + sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + + if (!rc) { + if (result == ARMCP_PACKET_FENCE_VAL) + dev_info(hdev->dev, + "queue test on CPU queue succeeded\n"); + else + dev_err(hdev->dev, + "CPU queue test failed (0x%08lX)\n", result); + } else { + dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); + } + + return rc; +} + +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle) +{ + u64 kernel_addr; + + /* roundup to HL_CPU_PKT_SIZE */ + size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK; + + kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); + + *dma_handle = hdev->cpu_accessible_dma_address + + (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem); + + return (void *) (uintptr_t) kernel_addr; +} + +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr) +{ + /* roundup to HL_CPU_PKT_SIZE */ + size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK; + + gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, + size); +} + +int hl_fw_send_heartbeat(struct hl_device *hdev) +{ + struct armcp_packet hb_pkt = {}; + long result; + int rc; + + hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << + ARMCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, + sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + + if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) + rc = -EIO; + + return rc; +} + +int hl_fw_armcp_info_get(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct armcp_packet pkt = {}; + void *armcp_info_cpu_addr; + dma_addr_t armcp_info_dma_addr; + long result; + int rc; + + armcp_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + sizeof(struct armcp_info), + &armcp_info_dma_addr); + if (!armcp_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for ArmCP info packet\n"); + return -ENOMEM; + } + + memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(armcp_info_dma_addr + + prop->host_phys_base_address); + pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to send armcp info pkt, error %d\n", rc); + goto out; + } + + memcpy(&prop->armcp_info, armcp_info_cpu_addr, + sizeof(prop->armcp_info)); + + rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); + if (rc) { + dev_err(hdev->dev, + "Failed to build hwmon channel info, error %d\n", rc); + rc = -EFAULT; + goto out; + } + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + sizeof(struct armcp_info), armcp_info_cpu_addr); + + return rc; +} + +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct armcp_packet pkt = {}; + void *eeprom_info_cpu_addr; + dma_addr_t eeprom_info_dma_addr; + long result; + int rc; + + eeprom_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + max_size, &eeprom_info_dma_addr); + if (!eeprom_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for EEPROM info packet\n"); + return -ENOMEM; + } + + memset(eeprom_info_cpu_addr, 0, max_size); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(eeprom_info_dma_addr + + prop->host_phys_base_address); + pkt.data_max_size = cpu_to_le32(max_size); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_EEPROM_TIMEOUT_USEC, &result); + + if (rc) { + dev_err(hdev->dev, + "Failed to send armcp EEPROM pkt, error %d\n", rc); + goto out; + } + + /* result contains the actual size */ + memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, + eeprom_info_cpu_addr); + + return rc; +} diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3dfac5393269..30f83521de90 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -12,10 +12,8 @@ #include #include -#include #include #include -#include /* * GOYA security scheme: @@ -373,18 +371,6 @@ static void goya_get_fixed_properties(struct hl_device *hdev) prop->high_pll = PLL_HIGH_DEFAULT; } -int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode) -{ - struct armcp_packet pkt; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); - - return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); -} - /* * goya_pci_bars_map - Map PCI BARS of Goya device * @@ -802,7 +788,7 @@ static int goya_late_init(struct hl_device *hdev) */ WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); - rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); return rc; @@ -828,7 +814,7 @@ static int goya_late_init(struct hl_device *hdev) return 0; disable_pci_access: - goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); return rc; } @@ -900,19 +886,16 @@ static int goya_sw_init(struct hl_device *hdev) hdev->cpu_accessible_dma_mem = hdev->asic_funcs->dma_alloc_coherent(hdev, - CPU_ACCESSIBLE_MEM_SIZE, + HL_CPU_ACCESSIBLE_MEM_SIZE, &hdev->cpu_accessible_dma_address, GFP_KERNEL | __GFP_ZERO); if (!hdev->cpu_accessible_dma_mem) { - dev_err(hdev->dev, - "failed to allocate %d of dma memory for CPU accessible memory space\n", - CPU_ACCESSIBLE_MEM_SIZE); rc = -ENOMEM; goto free_dma_pool; } - hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1); + hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1); if (!hdev->cpu_accessible_dma_pool) { dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); @@ -922,7 +905,7 @@ static int goya_sw_init(struct hl_device *hdev) rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, - CPU_ACCESSIBLE_MEM_SIZE, -1); + HL_CPU_ACCESSIBLE_MEM_SIZE, -1); if (rc) { dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); @@ -937,7 +920,7 @@ static int goya_sw_init(struct hl_device *hdev) free_cpu_pq_pool: gen_pool_destroy(hdev->cpu_accessible_dma_pool); free_cpu_pq_dma_mem: - hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, hdev->cpu_accessible_dma_address); free_dma_pool: @@ -960,7 +943,7 @@ static int goya_sw_fini(struct hl_device *hdev) gen_pool_destroy(hdev->cpu_accessible_dma_pool); - hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, hdev->cpu_accessible_dma_address); @@ -1240,7 +1223,7 @@ static int goya_init_cpu_queues(struct hl_device *hdev) WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, CPU_ACCESSIBLE_MEM_SIZE); + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE); /* Used for EQ CI */ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0); @@ -2328,67 +2311,45 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) } /* - * goya_push_fw_to_device - Push FW code to device + * goya_push_uboot_to_device() - Push u-boot FW code to device. + * @hdev: Pointer to hl_device structure. * - * @hdev: pointer to hl_device structure - * - * Copy fw code from firmware file to device memory. - * Returns 0 on success + * Copy u-boot fw code from firmware file to SRAM BAR. * + * Return: 0 on success, non-zero for failure. */ -static int goya_push_fw_to_device(struct hl_device *hdev, const char *fw_name, - void __iomem *dst) +static int goya_push_uboot_to_device(struct hl_device *hdev) { - const struct firmware *fw; - const u64 *fw_data; - size_t fw_size, i; - int rc; + char fw_name[200]; + void __iomem *dst; - rc = request_firmware(&fw, fw_name, hdev->dev); + snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin"); + dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; - if (rc) { - dev_err(hdev->dev, "Failed to request %s\n", fw_name); - goto out; - } + return hl_fw_push_fw_to_device(hdev, fw_name, dst); +} - fw_size = fw->size; - if ((fw_size % 4) != 0) { - dev_err(hdev->dev, "illegal %s firmware size %zu\n", - fw_name, fw_size); - rc = -EINVAL; - goto out; - } +/* + * goya_push_linux_to_device() - Push LINUX FW code to device. + * @hdev: Pointer to hl_device structure. + * + * Copy LINUX fw code from firmware file to HBM BAR. + * + * Return: 0 on success, non-zero for failure. + */ +static int goya_push_linux_to_device(struct hl_device *hdev) +{ + char fw_name[200]; + void __iomem *dst; - dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); + snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); + dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - fw_data = (const u64 *) fw->data; - - if ((fw->size % 8) != 0) - fw_size -= 8; - - for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) { - if (!(i & (0x80000 - 1))) { - dev_dbg(hdev->dev, - "copied so far %zu out of %zu for %s firmware", - i, fw_size, fw_name); - usleep_range(20, 100); - } - - writeq(*fw_data, dst); - } - - if ((fw->size % 8) != 0) - writel(*(const u32 *) fw_data, dst); - -out: - release_firmware(fw); - return rc; + return hl_fw_push_fw_to_device(hdev, fw_name, dst); } static int goya_pldm_init_cpu(struct hl_device *hdev) { - char fw_name[200]; - void __iomem *dst; u32 val, unit_rst_val; int rc; @@ -2406,15 +2367,11 @@ static int goya_pldm_init_cpu(struct hl_device *hdev) WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val); val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N); - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin"); - dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; - rc = goya_push_fw_to_device(hdev, fw_name, dst); + rc = goya_push_uboot_to_device(hdev); if (rc) return rc; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); - dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - rc = goya_push_fw_to_device(hdev, fw_name, dst); + rc = goya_push_linux_to_device(hdev); if (rc) return rc; @@ -2476,8 +2433,6 @@ static void goya_read_device_fw_version(struct hl_device *hdev, static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) { struct goya_device *goya = hdev->asic_specific; - char fw_name[200]; - void __iomem *dst; u32 status; int rc; @@ -2574,9 +2529,7 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) goto out; } - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); - dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - rc = goya_push_fw_to_device(hdev, fw_name, dst); + rc = goya_push_linux_to_device(hdev); if (rc) return rc; @@ -2762,7 +2715,7 @@ static int goya_hw_init(struct hl_device *hdev) return 0; disable_pci_access: - goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); disable_msix: goya_disable_msix(hdev); disable_queues: @@ -2869,7 +2822,7 @@ int goya_suspend(struct hl_device *hdev) { int rc; - rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); if (rc) dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); @@ -3150,10 +3103,6 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, u32 timeout, long *result) { struct goya_device *goya = hdev->asic_specific; - struct armcp_packet *pkt; - dma_addr_t pkt_dma_addr; - u32 tmp; - int rc = 0; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) { if (result) @@ -3161,74 +3110,8 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, return 0; } - if (len > CPU_CB_SIZE) { - dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n", - len); - return -ENOMEM; - } - - pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, - &pkt_dma_addr); - if (!pkt) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for packet to CPU\n"); - return -ENOMEM; - } - - memcpy(pkt, msg, len); - - mutex_lock(&hdev->send_cpu_message_lock); - - if (hdev->disabled) - goto out; - - if (hdev->device_cpu_disabled) { - rc = -EIO; - goto out; - } - - rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_CPU_PQ, len, - pkt_dma_addr); - if (rc) { - dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); - goto out; - } - - rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence, - timeout, &tmp); - - hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_CPU_PQ); - - if (rc == -ETIMEDOUT) { - dev_err(hdev->dev, "Timeout while waiting for device CPU\n"); - hdev->device_cpu_disabled = true; - goto out; - } - - if (tmp == ARMCP_PACKET_FENCE_VAL) { - u32 ctl = le32_to_cpu(pkt->ctl); - - rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; - if (rc) { - dev_err(hdev->dev, - "F/W ERROR %d for CPU packet %d\n", - rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK) - >> ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = -EINVAL; - } else if (result) { - *result = (long) le64_to_cpu(pkt->result); - } - } else { - dev_err(hdev->dev, "CPU packet wrong fence value\n"); - rc = -EINVAL; - } - -out: - mutex_unlock(&hdev->send_cpu_message_lock); - - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); - - return rc; + return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, + timeout, result); } int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) @@ -3306,33 +3189,16 @@ free_fence_ptr: int goya_test_cpu_queue(struct hl_device *hdev) { - struct armcp_packet test_pkt; - long result; - int rc; + struct goya_device *goya = hdev->asic_specific; - /* cpu_queues_enable flag is always checked in send cpu message */ + /* + * check capability here as send_cpu_message() won't update the result + * value if no capability + */ + if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; - memset(&test_pkt, 0, sizeof(test_pkt)); - - test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); - - if (!rc) { - if (result == ARMCP_PACKET_FENCE_VAL) - dev_info(hdev->dev, - "queue test on CPU queue succeeded\n"); - else - dev_err(hdev->dev, - "CPU queue test failed (0x%08lX)\n", result); - } else { - dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); - } - - return rc; + return hl_fw_test_cpu_queue(hdev); } static int goya_test_queues(struct hl_device *hdev) @@ -3373,27 +3239,13 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr, static void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) { - u64 kernel_addr; - - /* roundup to CPU_PKT_SIZE */ - size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK; - - kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); - - *dma_handle = hdev->cpu_accessible_dma_address + - (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem); - - return (void *) (uintptr_t) kernel_addr; + return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); } static void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) { - /* roundup to CPU_PKT_SIZE */ - size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK; - - gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, - size); + hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg, @@ -5032,72 +4884,26 @@ static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, int goya_send_heartbeat(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; - struct armcp_packet hb_pkt; - long result; - int rc; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - memset(&hb_pkt, 0, sizeof(hb_pkt)); - - hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); - - if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) - rc = -EIO; - - return rc; + return hl_fw_send_heartbeat(hdev); } static int goya_armcp_info_get(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; struct asic_fixed_properties *prop = &hdev->asic_prop; - struct armcp_packet pkt; - void *armcp_info_cpu_addr; - dma_addr_t armcp_info_dma_addr; u64 dram_size; - long result; int rc; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - armcp_info_cpu_addr = - hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - sizeof(struct armcp_info), &armcp_info_dma_addr); - if (!armcp_info_cpu_addr) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for ArmCP info packet\n"); - return -ENOMEM; - } - - memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(armcp_info_dma_addr + - prop->host_phys_base_address); - pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - GOYA_ARMCP_INFO_TIMEOUT, &result); - - if (rc) { - dev_err(hdev->dev, - "Failed to send armcp info pkt, error %d\n", rc); - goto out; - } - - memcpy(&prop->armcp_info, armcp_info_cpu_addr, - sizeof(prop->armcp_info)); + rc = hl_fw_armcp_info_get(hdev); + if (rc) + return rc; dram_size = le64_to_cpu(prop->armcp_info.dram_size); if (dram_size) { @@ -5113,19 +4919,7 @@ static int goya_armcp_info_get(struct hl_device *hdev) prop->dram_end_address = prop->dram_base_address + dram_size; } - rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); - if (rc) { - dev_err(hdev->dev, - "Failed to build hwmon channel info, error %d\n", rc); - rc = -EFAULT; - goto out; - } - -out: - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - sizeof(struct armcp_info), armcp_info_cpu_addr); - - return rc; + return 0; } static void goya_init_clock_gating(struct hl_device *hdev) @@ -5214,52 +5008,11 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { struct goya_device *goya = hdev->asic_specific; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct armcp_packet pkt; - void *eeprom_info_cpu_addr; - dma_addr_t eeprom_info_dma_addr; - long result; - int rc; if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - eeprom_info_cpu_addr = - hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - max_size, &eeprom_info_dma_addr); - if (!eeprom_info_cpu_addr) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for EEPROM info packet\n"); - return -ENOMEM; - } - - memset(eeprom_info_cpu_addr, 0, max_size); - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(eeprom_info_dma_addr + - prop->host_phys_base_address); - pkt.data_max_size = cpu_to_le32(max_size); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - GOYA_ARMCP_EEPROM_TIMEOUT, &result); - - if (rc) { - dev_err(hdev->dev, - "Failed to send armcp EEPROM pkt, error %d\n", rc); - goto out; - } - - /* result contains the actual size */ - memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); - -out: - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, - eeprom_info_cpu_addr); - - return rc; + return hl_fw_get_eeprom_data(hdev, data, max_size); } static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 830551b6b062..6fdc76b5bde0 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -49,9 +49,6 @@ #define MAX_POWER_DEFAULT 200000 /* 200W */ -#define GOYA_ARMCP_INFO_TIMEOUT 10000000 /* 10s */ -#define GOYA_ARMCP_EEPROM_TIMEOUT 10000000 /* 10s */ - #define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */ /* DRAM Memory Map */ @@ -142,13 +139,6 @@ #define HW_CAP_GOLDEN 0x00000400 #define HW_CAP_TPC 0x00000800 -#define CPU_PKT_SHIFT 5 -#define CPU_PKT_SIZE (1 << CPU_PKT_SHIFT) -#define CPU_PKT_MASK (~((1 << CPU_PKT_SHIFT) - 1)) -#define CPU_MAX_PKTS_IN_CB 32 -#define CPU_CB_SIZE (CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB) -#define CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * CPU_CB_SIZE) - enum goya_fw_component { FW_COMP_UBOOT, FW_COMP_PREBOOT @@ -192,7 +182,6 @@ void goya_init_security(struct hl_device *hdev); u64 goya_get_max_power(struct hl_device *hdev); void goya_set_max_power(struct hl_device *hdev, u64 value); -int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode); void goya_late_fini(struct hl_device *hdev); int goya_suspend(struct hl_device *hdev); int goya_resume(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index a8ee52c880cd..1445ba1cafd4 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -33,6 +33,9 @@ #define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ +#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ + #define HL_MAX_QUEUES 128 #define HL_MAX_JOBS_PER_CS 64 @@ -1351,6 +1354,20 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size); void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); +int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst); +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode); +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result); +int hl_fw_test_cpu_queue(struct hl_device *hdev); +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle); +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr); +int hl_fw_send_heartbeat(struct hl_device *hdev); +int hl_fw_armcp_info_get(struct hl_device *hdev); +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); + long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr); diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index 9dddb917e72c..ccb82390241e 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -302,6 +302,14 @@ enum armcp_pwm_attributes { armcp_pwm_enable }; +#define HL_CPU_PKT_SHIFT 5 +#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT) +#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1)) +#define HL_CPU_MAX_PKTS_IN_CB 32 +#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \ + HL_CPU_MAX_PKTS_IN_CB) +#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE) + /* Event Queue Packets */ struct eq_generic_event { From c535bfdd0f86aeed165d4f5aba187570f42d785f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 4 Mar 2019 15:51:30 +0200 Subject: [PATCH 05/31] habanalabs: use EQ MSI/X ID per chip The Event Queue MSI/X ID is different per ASIC. This patch renames the current define to have the GOYA_ prefix to mark it only for Goya. It also moves it from the common armcp_if.h file to the ASIC specific goya_fw_if.h file. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 8 ++++---- drivers/misc/habanalabs/include/armcp_if.h | 2 -- drivers/misc/habanalabs/include/goya/goya_fw_if.h | 2 ++ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 30f83521de90..78eacd14c9ac 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2204,10 +2204,10 @@ static int goya_enable_msix(struct hl_device *hdev) } } - irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX); + irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); rc = request_irq(irq, hl_irq_handler_eq, 0, - goya_irq_name[EVENT_QUEUE_MSIX_IDX], + goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX], &hdev->event_queue); if (rc) { dev_err(hdev->dev, "Failed to request IRQ %d", irq); @@ -2238,7 +2238,7 @@ static void goya_sync_irqs(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) synchronize_irq(pci_irq_vector(hdev->pdev, i)); - synchronize_irq(pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX)); + synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX)); } static void goya_disable_msix(struct hl_device *hdev) @@ -2251,7 +2251,7 @@ static void goya_disable_msix(struct hl_device *hdev) goya_sync_irqs(hdev); - irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX); + irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); free_irq(irq, &hdev->event_queue); for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index ccb82390241e..c8f28cadc335 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -32,8 +32,6 @@ struct hl_eq_entry { #define EQ_CTL_EVENT_TYPE_SHIFT 16 #define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 -#define EVENT_QUEUE_MSIX_IDX 5 - enum pq_init_status { PQ_INIT_STATUS_NA = 0, PQ_INIT_STATUS_READY_FOR_CP, diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h index a9920cb4a07b..0fa80fe9f6cc 100644 --- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h +++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h @@ -8,6 +8,8 @@ #ifndef GOYA_FW_IF_H #define GOYA_FW_IF_H +#define GOYA_EVENT_QUEUE_MSIX_IDX 5 + #define CPU_BOOT_ADDR 0x7FF8040000ull #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ From e0a29952c52787f8e4a4fc3046f3e47916ffa239 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 28 Feb 2019 10:19:41 +0200 Subject: [PATCH 06/31] habanalabs: remove unused defines This patch removes some old defines which are not in use anymore. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goyaP.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 6fdc76b5bde0..ae5e41bc8f7f 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -54,11 +54,9 @@ /* DRAM Memory Map */ #define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ -#define MMU_PAGE_TABLES_SIZE 0x0DE00000 /* 222MB */ +#define MMU_PAGE_TABLES_SIZE 0x0FC00000 /* 252MB */ #define MMU_DRAM_DEFAULT_PAGE_SIZE 0x00200000 /* 2MB */ #define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */ -#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */ -#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */ #define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE #define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE) @@ -66,13 +64,13 @@ MMU_PAGE_TABLES_SIZE) #define MMU_CACHE_MNG_ADDR (MMU_DRAM_DEFAULT_PAGE_ADDR + \ MMU_DRAM_DEFAULT_PAGE_SIZE) -#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + \ +#define DRAM_KMD_END_ADDR (MMU_CACHE_MNG_ADDR + \ MMU_CACHE_MNG_SIZE) -#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE) -#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE) -#if (DRAM_BASE_ADDR_USER != 0x20000000) -#error "KMD must reserve 512MB" +#define DRAM_BASE_ADDR_USER 0x20000000 + +#if (DRAM_KMD_END_ADDR > DRAM_BASE_ADDR_USER) +#error "KMD must reserve no more than 512MB" #endif /* From 680cb3991c9ed72cefffc49e222a8c8d34ff5e3e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 5 Mar 2019 13:53:22 +0200 Subject: [PATCH 07/31] habanalabs: ratelimit warnings at start of IOCTLs At the start of some IOCTLs we check if the device is disabled or in reset. If it is, we return -EBUSY and print a message to kernel log. Because these IOCTLs can be called at very high frequency, use ratelimit to avoid spamming the kernel log. Also use the same type of message - dev_warn - in all the relevant IOCTLs. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_submission.c | 2 +- drivers/misc/habanalabs/habanalabs_ioctl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 19c84214a7ea..f908643f871f 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -604,7 +604,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) bool need_soft_reset = false; if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn(hdev->dev, + dev_warn_ratelimited(hdev->dev, "Device is %s. Can't submit new CS\n", atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); rc = -EBUSY; diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 2c2739a3c5ec..19b96afc0308 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -106,7 +106,7 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) int rc; if (hl_device_disabled_or_in_reset(hdev)) { - dev_err(hdev->dev, + dev_warn_ratelimited(hdev->dev, "Device is disabled or in reset. Can't execute INFO IOCTL\n"); return -EBUSY; } From b6f897d75d651f86a3988a602e767696f9e08de6 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 5 Mar 2019 16:48:42 +0200 Subject: [PATCH 08/31] habanalabs: Move PCI code into common file Move duplicated PCI-related code from ASIC-specific files into the common pci.c file. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/Makefile | 2 +- drivers/misc/habanalabs/goya/goya.c | 272 +------------ drivers/misc/habanalabs/habanalabs.h | 20 + .../include/goya/asic_reg/goya_masks.h | 12 - .../include/hw_ip/pci/pci_general.h | 23 ++ drivers/misc/habanalabs/pci.c | 370 ++++++++++++++++++ 6 files changed, 434 insertions(+), 265 deletions(-) create mode 100644 drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h create mode 100644 drivers/misc/habanalabs/pci.c diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index 598740d235f8..f8e85243d672 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -6,7 +6,7 @@ obj-m := habanalabs.o habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \ command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \ - command_submission.o mmu.o firmware_if.o + command_submission.o mmu.o firmware_if.o pci.o habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 78eacd14c9ac..5ce3a341ba5f 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -363,12 +363,13 @@ static void goya_get_fixed_properties(struct hl_device *hdev) prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; + prop->high_pll = PLL_HIGH_DEFAULT; prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT; prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE; prop->max_power_default = MAX_POWER_DEFAULT; prop->tpc_enabled_mask = TPC_ENABLED_MASK; - - prop->high_pll = PLL_HIGH_DEFAULT; + prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; + prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; } /* @@ -382,159 +383,18 @@ static void goya_get_fixed_properties(struct hl_device *hdev) */ static int goya_pci_bars_map(struct hl_device *hdev) { - struct pci_dev *pdev = hdev->pdev; + static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"}; + bool is_wc[3] = {false, false, true}; int rc; - rc = pci_request_regions(pdev, HL_NAME); - if (rc) { - dev_err(hdev->dev, "Cannot obtain PCI resources\n"); + rc = hl_pci_bars_map(hdev, name, is_wc); + if (rc) return rc; - } - - hdev->pcie_bar[SRAM_CFG_BAR_ID] = - pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID); - if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) { - dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n"); - rc = -ENODEV; - goto err_release_regions; - } - - hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID); - if (!hdev->pcie_bar[MSIX_BAR_ID]) { - dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n"); - rc = -ENODEV; - goto err_unmap_sram_cfg; - } - - hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID); - if (!hdev->pcie_bar[DDR_BAR_ID]) { - dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n"); - rc = -ENODEV; - goto err_unmap_msix; - } hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + - (CFG_BASE - SRAM_BASE_ADDR); + (CFG_BASE - SRAM_BASE_ADDR); return 0; - -err_unmap_msix: - iounmap(hdev->pcie_bar[MSIX_BAR_ID]); -err_unmap_sram_cfg: - iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]); -err_release_regions: - pci_release_regions(pdev); - - return rc; -} - -/* - * goya_pci_bars_unmap - Unmap PCI BARS of Goya device - * - * @hdev: pointer to hl_device structure - * - * Release all PCI BARS and unmap their virtual addresses - * - */ -static void goya_pci_bars_unmap(struct hl_device *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - - iounmap(hdev->pcie_bar[DDR_BAR_ID]); - iounmap(hdev->pcie_bar[MSIX_BAR_ID]); - iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]); - pci_release_regions(pdev); -} - -/* - * goya_elbi_write - Write through the ELBI interface - * - * @hdev: pointer to hl_device structure - * - * return 0 on success, -1 on failure - * - */ -static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data) -{ - struct pci_dev *pdev = hdev->pdev; - ktime_t timeout; - u32 val; - - /* Clear previous status */ - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); - - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, - PCI_CONFIG_ELBI_CTRL_WRITE); - - timeout = ktime_add_ms(ktime_get(), 10); - for (;;) { - pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); - if (val & PCI_CONFIG_ELBI_STS_MASK) - break; - if (ktime_compare(ktime_get(), timeout) > 0) { - pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, - &val); - break; - } - usleep_range(300, 500); - } - - if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) - return 0; - - if (val & PCI_CONFIG_ELBI_STS_ERR) { - dev_err(hdev->dev, "Error writing to ELBI\n"); - return -EIO; - } - - if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { - dev_err(hdev->dev, "ELBI write didn't finish in time\n"); - return -EIO; - } - - dev_err(hdev->dev, "ELBI write has undefined bits in status\n"); - return -EIO; -} - -/* - * goya_iatu_write - iatu write routine - * - * @hdev: pointer to hl_device structure - * - */ -static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data) -{ - u32 dbi_offset; - int rc; - - dbi_offset = addr & 0xFFF; - - rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000); - rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data); - - if (rc) - return -EIO; - - return 0; -} - -static void goya_reset_link_through_bridge(struct hl_device *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - struct pci_dev *parent_port; - u16 val; - - parent_port = pdev->bus->self; - pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val); - val |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); - ssleep(1); - - val &= ~(PCI_BRIDGE_CTL_BUS_RESET); - pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); - ssleep(3); } /* @@ -556,20 +416,9 @@ static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) return 0; /* Inbound Region 1 - Bar 4 - Point to DDR */ - rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr)); - rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr)); - rc |= goya_iatu_write(hdev, 0x300, 0); - /* Enable + Bar match + match enable + Bar 4 */ - rc |= goya_iatu_write(hdev, 0x304, 0xC0080400); - - /* Return the DBI window to the default location */ - rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0); - rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0); - - if (rc) { - dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr); - return -EIO; - } + rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr); + if (rc) + return rc; if (goya) goya->ddr_bar_cur_addr = addr; @@ -587,40 +436,8 @@ static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) */ static int goya_init_iatu(struct hl_device *hdev) { - int rc; - - /* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */ - rc = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR)); - rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR)); - rc |= goya_iatu_write(hdev, 0x100, 0); - /* Enable + Bar match + match enable */ - rc |= goya_iatu_write(hdev, 0x104, 0xC0080000); - - /* Inbound Region 1 - Bar 4 - Point to DDR */ - rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE); - - /* Outbound Region 0 - Point to Host */ - rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE)); - rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE)); - rc |= goya_iatu_write(hdev, 0x010, - lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1)); - rc |= goya_iatu_write(hdev, 0x014, 0); - rc |= goya_iatu_write(hdev, 0x018, 0); - rc |= goya_iatu_write(hdev, 0x020, - upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1)); - /* Increase region size */ - rc |= goya_iatu_write(hdev, 0x000, 0x00002000); - /* Enable */ - rc |= goya_iatu_write(hdev, 0x004, 0x80000000); - - /* Return the DBI window to the default location */ - rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0); - rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0); - - if (rc) - return -EIO; - - return 0; + return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE, + HOST_PHYS_SIZE); } /* @@ -666,52 +483,9 @@ static int goya_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); - /* set DMA mask for GOYA */ - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); - if (rc) { - dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n"); - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - dev_err(hdev->dev, - "Unable to set pci dma mask to 32 bits\n"); - return rc; - } - } - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - if (rc) { - dev_warn(hdev->dev, - "Unable to set pci consistent dma mask to 39 bits\n"); - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - dev_err(hdev->dev, - "Unable to set pci consistent dma mask to 32 bits\n"); - return rc; - } - } - - if (hdev->reset_pcilink) - goya_reset_link_through_bridge(hdev); - - rc = pci_enable_device_mem(pdev); - if (rc) { - dev_err(hdev->dev, "can't enable PCI device\n"); + rc = hl_pci_init(hdev); + if (rc) return rc; - } - - pci_set_master(pdev); - - rc = goya_init_iatu(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to initialize iATU\n"); - goto disable_device; - } - - rc = goya_pci_bars_map(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to initialize PCI BARS\n"); - goto disable_device; - } if (!hdev->pldm) { val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); @@ -721,12 +495,6 @@ static int goya_early_init(struct hl_device *hdev) } return 0; - -disable_device: - pci_clear_master(pdev); - pci_disable_device(pdev); - - return rc; } /* @@ -739,10 +507,7 @@ disable_device: */ static int goya_early_fini(struct hl_device *hdev) { - goya_pci_bars_unmap(hdev); - - pci_clear_master(hdev->pdev); - pci_disable_device(hdev->pdev); + hl_pci_fini(hdev); return 0; } @@ -5071,7 +4836,10 @@ static const struct hl_asic_funcs goya_funcs = { .get_pci_id = goya_get_pci_id, .get_eeprom_data = goya_get_eeprom_data, .send_cpu_message = goya_send_cpu_message, - .get_hw_state = goya_get_hw_state + .get_hw_state = goya_get_hw_state, + .pci_bars_map = goya_pci_bars_map, + .set_dram_bar_base = goya_set_ddr_bar_base, + .init_iatu = goya_init_iatu }; /* diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 1445ba1cafd4..ed3649d38ff8 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -148,6 +148,8 @@ enum hl_device_hw_state { * mapping DRAM memory. * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * fault. + * @pcie_dbi_base_address: Base address of the PCIE_DBI block. + * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register. * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. * @mmu_dram_default_page_addr: DRAM default page physical address. * @mmu_pgt_size: MMU page tables total size. @@ -189,6 +191,8 @@ struct asic_fixed_properties { u64 va_space_dram_start_address; u64 va_space_dram_end_address; u64 dram_size_for_default_page_mapping; + u64 pcie_dbi_base_address; + u64 pcie_aux_dbi_reg_addr; u64 mmu_pgt_addr; u64 mmu_dram_default_page_addr; u32 mmu_pgt_size; @@ -485,6 +489,9 @@ enum hl_pll_frequency { * @get_eeprom_data: retrieve EEPROM data from F/W. * @send_cpu_message: send buffer to ArmCP. * @get_hw_state: retrieve the H/W state + * @pci_bars_map: Map PCI BARs. + * @set_dram_bar_base: Set DRAM BAR to map specific device address. + * @init_iatu: Initialize the iATU unit inside the PCI controller. */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -558,6 +565,9 @@ struct hl_asic_funcs { int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, u16 len, u32 timeout, long *result); enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev); + int (*pci_bars_map)(struct hl_device *hdev); + int (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); + int (*init_iatu)(struct hl_device *hdev); }; @@ -1368,6 +1378,16 @@ int hl_fw_send_heartbeat(struct hl_device *hdev); int hl_fw_armcp_info_get(struct hl_device *hdev); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], + bool is_wc[3]); +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); +int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar, + u64 addr); +int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address, + u64 dram_base_address, u64 host_phys_size); +int hl_pci_init(struct hl_device *hdev); +void hl_pci_fini(struct hl_device *hdev); + long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr); diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h index a161ecfe74de..8618891d5afa 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h @@ -189,18 +189,6 @@ 1 << CPU_CA53_CFG_ARM_RST_CONTROL_NL2RESET_SHIFT |\ 1 << CPU_CA53_CFG_ARM_RST_CONTROL_NMBISTRESET_SHIFT) -/* PCI CONFIGURATION SPACE */ -#define mmPCI_CONFIG_ELBI_ADDR 0xFF0 -#define mmPCI_CONFIG_ELBI_DATA 0xFF4 -#define mmPCI_CONFIG_ELBI_CTRL 0xFF8 -#define PCI_CONFIG_ELBI_CTRL_WRITE (1 << 31) - -#define mmPCI_CONFIG_ELBI_STS 0xFFC -#define PCI_CONFIG_ELBI_STS_ERR (1 << 30) -#define PCI_CONFIG_ELBI_STS_DONE (1 << 31) -#define PCI_CONFIG_ELBI_STS_MASK (PCI_CONFIG_ELBI_STS_ERR | \ - PCI_CONFIG_ELBI_STS_DONE) - #define GOYA_IRQ_HBW_ID_MASK 0x1FFF #define GOYA_IRQ_HBW_ID_SHIFT 0 #define GOYA_IRQ_HBW_INTERNAL_ID_MASK 0xE000 diff --git a/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h new file mode 100644 index 000000000000..d232081d4e0f --- /dev/null +++ b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef INCLUDE_PCI_GENERAL_H_ +#define INCLUDE_PCI_GENERAL_H_ + +/* PCI CONFIGURATION SPACE */ +#define mmPCI_CONFIG_ELBI_ADDR 0xFF0 +#define mmPCI_CONFIG_ELBI_DATA 0xFF4 +#define mmPCI_CONFIG_ELBI_CTRL 0xFF8 +#define PCI_CONFIG_ELBI_CTRL_WRITE (1 << 31) + +#define mmPCI_CONFIG_ELBI_STS 0xFFC +#define PCI_CONFIG_ELBI_STS_ERR (1 << 30) +#define PCI_CONFIG_ELBI_STS_DONE (1 << 31) +#define PCI_CONFIG_ELBI_STS_MASK (PCI_CONFIG_ELBI_STS_ERR | \ + PCI_CONFIG_ELBI_STS_DONE) + +#endif /* INCLUDE_PCI_GENERAL_H_ */ diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c new file mode 100644 index 000000000000..822ac110b997 --- /dev/null +++ b/drivers/misc/habanalabs/pci.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "include/hw_ip/pci/pci_general.h" + +#include + +/** + * hl_pci_bars_map() - Map PCI BARs. + * @hdev: Pointer to hl_device structure. + * @bar_name: Array of BAR names. + * @is_wc: Array with flag per BAR whether a write-combined mapping is needed. + * + * Request PCI regions and map them to kernel virtual addresses. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], + bool is_wc[3]) +{ + struct pci_dev *pdev = hdev->pdev; + int rc, i, bar; + + rc = pci_request_regions(pdev, HL_NAME); + if (rc) { + dev_err(hdev->dev, "Cannot obtain PCI resources\n"); + return rc; + } + + for (i = 0 ; i < 3 ; i++) { + bar = i * 2; /* 64-bit BARs */ + hdev->pcie_bar[bar] = is_wc[i] ? + pci_ioremap_wc_bar(pdev, bar) : + pci_ioremap_bar(pdev, bar); + if (!hdev->pcie_bar[bar]) { + dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n", + is_wc[i] ? "_wc" : "", name[i]); + rc = -ENODEV; + goto err; + } + } + + return 0; + +err: + for (i = 2 ; i >= 0 ; i--) { + bar = i * 2; /* 64-bit BARs */ + if (hdev->pcie_bar[bar]) + iounmap(hdev->pcie_bar[bar]); + } + + pci_release_regions(pdev); + + return rc; +} + +/* + * hl_pci_bars_unmap() - Unmap PCI BARS. + * @hdev: Pointer to hl_device structure. + * + * Release all PCI BARs and unmap their virtual addresses. + */ +static void hl_pci_bars_unmap(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int i, bar; + + for (i = 2 ; i >= 0 ; i--) { + bar = i * 2; /* 64-bit BARs */ + iounmap(hdev->pcie_bar[bar]); + } + + pci_release_regions(pdev); +} + +/* + * hl_pci_elbi_write() - Write through the ELBI interface. + * @hdev: Pointer to hl_device structure. + * + * Return: 0 on success, negative value for failure. + */ +static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u32 val; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, + PCI_CONFIG_ELBI_CTRL_WRITE); + + timeout = ktime_add_ms(ktime_get(), 10); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) + return 0; + + if (val & PCI_CONFIG_ELBI_STS_ERR) { + dev_err(hdev->dev, "Error writing to ELBI\n"); + return -EIO; + } + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI write didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI write has undefined bits in status\n"); + return -EIO; +} + +/** + * hl_pci_iatu_write() - iatu write routine. + * @hdev: Pointer to hl_device structure. + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 dbi_offset; + int rc; + + dbi_offset = addr & 0xFFF; + + rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); + rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, + data); + + if (rc) + return -EIO; + + return 0; +} + +/* + * hl_pci_reset_link_through_bridge() - Reset PCI link. + * @hdev: Pointer to hl_device structure. + */ +static void hl_pci_reset_link_through_bridge(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct pci_dev *parent_port; + u16 val; + + parent_port = pdev->bus->self; + pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val); + val |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(1); + + val &= ~(PCI_BRIDGE_CTL_BUS_RESET); + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(3); +} + +/** + * hl_pci_set_dram_bar_base() - Set DDR BAR to map specific device address. + * @hdev: Pointer to hl_device structure. + * @inbound_region: Inbound region number. + * @bar: PCI BAR number. + * @addr: Address in DRAM. Must be aligned to DRAM bar size. + * + * Configure the iATU so that the DRAM bar will start at the specified address. + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar, + u64 addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 offset; + int rc; + + switch (inbound_region) { + case 0: + offset = 0x100; + break; + case 1: + offset = 0x300; + break; + case 2: + offset = 0x500; + break; + default: + dev_err(hdev->dev, "Invalid inbound region %d\n", + inbound_region); + return -EINVAL; + } + + if (bar != 0 && bar != 2 && bar != 4) { + dev_err(hdev->dev, "Invalid PCI BAR %d\n", bar); + return -EINVAL; + } + + /* Point to the specified address */ + rc = hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(addr)); + rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(addr)); + rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0); + /* Enable + BAR match + match enable + BAR number */ + rc |= hl_pci_iatu_write(hdev, offset + 0x4, 0xC0080000 | (bar << 8)); + + /* Return the DBI window to the default location */ + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + + if (rc) + dev_err(hdev->dev, "failed to map DRAM bar to 0x%08llx\n", + addr); + + return rc; +} + +/** + * hl_pci_init_iatu() - Initialize the iATU unit inside the PCI controller. + * @hdev: Pointer to hl_device structure. + * @sram_base_address: SRAM base address. + * @dram_base_address: DRAM base address. + * @host_phys_size: Size of host memory for device transactions. + * + * This is needed in case the firmware doesn't initialize the iATU. + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address, + u64 dram_base_address, u64 host_phys_size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 host_phys_end_addr; + int rc = 0; + + /* Inbound Region 0 - Bar 0 - Point to SRAM base address */ + rc = hl_pci_iatu_write(hdev, 0x114, lower_32_bits(sram_base_address)); + rc |= hl_pci_iatu_write(hdev, 0x118, upper_32_bits(sram_base_address)); + rc |= hl_pci_iatu_write(hdev, 0x100, 0); + /* Enable + Bar match + match enable */ + rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000); + + /* Point to DRAM */ + if (!hdev->asic_funcs->set_dram_bar_base) + return -EINVAL; + rc |= hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address); + + /* Outbound Region 0 - Point to Host */ + host_phys_end_addr = prop->host_phys_base_address + host_phys_size - 1; + rc |= hl_pci_iatu_write(hdev, 0x008, + lower_32_bits(prop->host_phys_base_address)); + rc |= hl_pci_iatu_write(hdev, 0x00C, + upper_32_bits(prop->host_phys_base_address)); + rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr)); + rc |= hl_pci_iatu_write(hdev, 0x014, 0); + rc |= hl_pci_iatu_write(hdev, 0x018, 0); + rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr)); + /* Increase region size */ + rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000); + /* Enable */ + rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); + + /* Return the DBI window to the default location */ + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + + if (rc) + return -EIO; + + return 0; +} + +/** + * hl_pci_init() - PCI initialization code. + * @hdev: Pointer to hl_device structure. + * + * Set DMA masks, initialize the PCI controller and map the PCI BARs. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_pci_init(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + /* set DMA mask */ + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + if (rc) { + dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n"); + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(hdev->dev, + "Unable to set pci dma mask to 32 bits\n"); + return rc; + } + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + if (rc) { + dev_warn(hdev->dev, + "Unable to set pci consistent dma mask to 39 bits\n"); + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(hdev->dev, + "Unable to set pci consistent dma mask to 32 bits\n"); + return rc; + } + } + + if (hdev->reset_pcilink) + hl_pci_reset_link_through_bridge(hdev); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(hdev->dev, "can't enable PCI device\n"); + return rc; + } + + pci_set_master(pdev); + + rc = hdev->asic_funcs->init_iatu(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize iATU\n"); + goto disable_device; + } + + rc = hdev->asic_funcs->pci_bars_map(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize PCI BARs\n"); + goto disable_device; + } + + return 0; + +disable_device: + pci_clear_master(pdev); + pci_disable_device(pdev); + + return rc; +} + +/** + * hl_fw_fini() - PCI finalization code. + * @hdev: Pointer to hl_device structure + * + * Unmap PCI bars and disable PCI device. + */ +void hl_pci_fini(struct hl_device *hdev) +{ + hl_pci_bars_unmap(hdev); + + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); +} From 393e5b55e2228fdb8382cca78b2b1dcae2b17590 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 6 Mar 2019 14:30:26 +0200 Subject: [PATCH 09/31] habanalabs: Remove unneeded function pointers Remove pointers to ASIC-specific functions and instead call the functions explicitly as they are not accessed from outside the ASIC-specific files. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 12 +++--------- drivers/misc/habanalabs/goya/goyaP.h | 4 +--- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5ce3a341ba5f..3b34627500a2 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -298,7 +298,6 @@ static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 }; -static int goya_armcp_info_get(struct hl_device *hdev); static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); static int goya_mmu_clear_pgt_range(struct hl_device *hdev); static int goya_mmu_set_dram_default_page(struct hl_device *hdev); @@ -538,10 +537,9 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev) static int goya_late_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct goya_device *goya = hdev->asic_specific; int rc; - rc = goya->armcp_info_get(hdev); + rc = goya_armcp_info_get(hdev); if (rc) { dev_err(hdev->dev, "Failed to get armcp info\n"); return rc; @@ -628,9 +626,6 @@ static int goya_sw_init(struct hl_device *hdev) if (!goya) return -ENOMEM; - goya->test_cpu_queue = goya_test_cpu_queue; - goya->armcp_info_get = goya_armcp_info_get; - /* according to goya_init_iatu */ goya->ddr_bar_cur_addr = DRAM_PHYS_BASE; @@ -2968,7 +2963,6 @@ int goya_test_cpu_queue(struct hl_device *hdev) static int goya_test_queues(struct hl_device *hdev) { - struct goya_device *goya = hdev->asic_specific; int i, rc, ret_val = 0; for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { @@ -2978,7 +2972,7 @@ static int goya_test_queues(struct hl_device *hdev) } if (hdev->cpu_queues_enable) { - rc = goya->test_cpu_queue(hdev); + rc = goya_test_cpu_queue(hdev); if (rc) ret_val = -EINVAL; } @@ -4656,7 +4650,7 @@ int goya_send_heartbeat(struct hl_device *hdev) return hl_fw_send_heartbeat(hdev); } -static int goya_armcp_info_get(struct hl_device *hdev) +int goya_armcp_info_get(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; struct asic_fixed_properties *prop = &hdev->asic_prop; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index ae5e41bc8f7f..b99d92f197eb 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -143,9 +143,6 @@ enum goya_fw_component { }; struct goya_device { - int (*test_cpu_queue)(struct hl_device *hdev); - int (*armcp_info_get)(struct hl_device *hdev); - /* TODO: remove hw_queues_lock after moving to scheduler code */ spinlock_t hw_queues_lock; @@ -176,6 +173,7 @@ void goya_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state); void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); +int goya_armcp_info_get(struct hl_device *hdev); void goya_init_security(struct hl_device *hdev); u64 goya_get_max_power(struct hl_device *hdev); void goya_set_max_power(struct hl_device *hdev, u64 value); From e1266004872c837db4ed8a72f066820ebb5d5353 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 7 Mar 2019 14:20:05 +0200 Subject: [PATCH 10/31] uapi/habanalabs: add some comments in habanalabs.h This patch adds two comments in uapi/habanalabs.h: - From which queue id the internal queues begin - Invalid values that can be returned in the seq field from the CS IOCTL Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 7fd6f633534c..ab1957e31077 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -33,7 +33,7 @@ enum goya_queue_id { GOYA_QUEUE_ID_DMA_3, GOYA_QUEUE_ID_DMA_4, GOYA_QUEUE_ID_CPU_PQ, - GOYA_QUEUE_ID_MME, + GOYA_QUEUE_ID_MME, /* Internal queues start here */ GOYA_QUEUE_ID_TPC0, GOYA_QUEUE_ID_TPC1, GOYA_QUEUE_ID_TPC2, @@ -181,7 +181,10 @@ struct hl_cs_in { }; struct hl_cs_out { - /* this holds the sequence number of the CS to pass to wait ioctl */ + /* + * seq holds the sequence number of the CS to pass to wait ioctl. All + * values are valid except for 0 and ULLONG_MAX + */ __u64 seq; /* HL_CS_STATUS_* */ __u32 status; From c811f7bc771f1cc3b1d932d5439757802806a789 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 7 Mar 2019 14:26:02 +0200 Subject: [PATCH 11/31] habanalabs: Add a printout with the name of a busy engine Print the name of a busy engine when checking if a device is idle. The change is done mainly to help a user to pinpoint problems in his topology's recipe. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 24 ++++++++++++---------- drivers/misc/habanalabs/habanalabs.h | 8 +++++++- drivers/misc/habanalabs/habanalabs_ioctl.c | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3b34627500a2..630979422390 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2783,6 +2783,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) dma_addr_t fence_dma_addr; struct hl_cb *cb; u32 tmp, timeout; + char buf[16] = {}; int rc; if (hdev->pldm) @@ -2790,9 +2791,10 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) else timeout = HL_DEVICE_TIMEOUT_USEC; - if (!hdev->asic_funcs->is_device_idle(hdev)) { + if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) { dev_err_ratelimited(hdev->dev, - "Can't send KMD job on QMAN0 if device is not idle\n"); + "Can't send KMD job on QMAN0 because %s is busy\n", + buf); return -EBUSY; } @@ -4691,7 +4693,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev) } -static bool goya_is_device_idle(struct hl_device *hdev) +static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size) { u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg; int i; @@ -4703,7 +4705,7 @@ static bool goya_is_device_idle(struct hl_device *hdev) if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) != DMA_QM_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "DMA%d_QM", i); } offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0; @@ -4715,31 +4717,31 @@ static bool goya_is_device_idle(struct hl_device *hdev) if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) != TPC_QM_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "TPC%d_QM", i); if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) != TPC_CMDQ_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i); if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) != TPC_CFG_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i); } if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) != MME_QM_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "MME_QM"); if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) != MME_CMDQ_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "MME_CMDQ"); if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) != MME_ARCH_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "MME_ARCH"); if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK) - return false; + return HL_ENG_BUSY(buf, size, "MME"); return true; } diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index ed3649d38ff8..9cc841777d81 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -555,7 +555,7 @@ struct hl_asic_funcs { int (*send_heartbeat)(struct hl_device *hdev); void (*enable_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); - bool (*is_device_idle)(struct hl_device *hdev); + bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size); int (*soft_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); @@ -1010,6 +1010,12 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \ (val) << REG_FIELD_SHIFT(reg, field)) +#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \ + if (buf) \ + snprintf(buf, size, fmt, ##__VA_ARGS__); \ + false; \ + }) + struct hwmon_chip_info; /** diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 19b96afc0308..37f9de8e7404 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -93,7 +93,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev); + hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0); return copy_to_user(out, &hw_idle, min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; From d75bcf3e5d60d4e586f3fb20f2cc9041667497a7 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 12 Mar 2019 13:53:17 +0200 Subject: [PATCH 12/31] habanalabs: Allow accessing DRAM virtual addresses via debugfs The addr/data32 debugfs nodes currently permit the access to only physical addresses of a device. This patch extends it and allows accessing also device's DRAM virtual addresses. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/debugfs.c | 96 +++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 974a87789bd8..a4447699ff4e 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -505,22 +505,97 @@ err: return -EINVAL; } +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, + u64 *phys_addr) +{ + struct hl_ctx *ctx = hdev->user_ctx; + u64 hop_addr, hop_pte_addr, hop_pte; + int rc = 0; + + if (!ctx) { + dev_err(hdev->dev, "no ctx available\n"); + return -EINVAL; + } + + mutex_lock(&ctx->mmu_lock); + + /* hop 0 */ + hop_addr = get_hop0_addr(ctx); + hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 1 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 2 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 3 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + if (!(hop_pte & LAST_MASK)) { + /* hop 4 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + } + + if (!(hop_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + *phys_addr = (hop_pte & PTE_PHYS_ADDR_MASK) | (virt_addr & OFFSET_MASK); + + goto out; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + rc = -EINVAL; +out: + mutex_unlock(&ctx->mmu_lock); + return rc; +} + static ssize_t hl_data_read32(struct file *f, char __user *buf, size_t count, loff_t *ppos) { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; char tmp_buf[32]; + u64 addr = entry->addr; u32 val; ssize_t rc; if (*ppos) return 0; - rc = hdev->asic_funcs->debugfs_read32(hdev, entry->addr, &val); + if (addr >= prop->va_space_dram_start_address && + addr < prop->va_space_dram_end_address && + hdev->mmu_enable && + hdev->dram_supports_virtual_memory) { + rc = device_va_to_pa(hdev, entry->addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val); if (rc) { - dev_err(hdev->dev, "Failed to read from 0x%010llx\n", - entry->addr); + dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); return rc; } @@ -536,6 +611,8 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 addr = entry->addr; u32 value; ssize_t rc; @@ -543,10 +620,19 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, if (rc) return rc; - rc = hdev->asic_funcs->debugfs_write32(hdev, entry->addr, value); + if (addr >= prop->va_space_dram_start_address && + addr < prop->va_space_dram_end_address && + hdev->mmu_enable && + hdev->dram_supports_virtual_memory) { + rc = device_va_to_pa(hdev, entry->addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value); if (rc) { dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", - value, entry->addr); + value, addr); return rc; } From 66542c3b9d2fe179f7392f01aaf49a5b432426a7 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Sun, 24 Feb 2019 09:17:55 +0200 Subject: [PATCH 13/31] habanalabs: add MMU shadow mapping This patch adds shadow mapping to the MMU module. The shadow mapping allows traversing the page table in host memory rather reading each PTE from the device memory. It brings better performance and avoids reading from invalid device address upon PCI errors. Only at the end of map/unmap flow, writings to the device are performed in order to sync the H/W page tables with the shadow ones. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/habanalabs.h | 24 +- .../include/hw_ip/mmu/mmu_general.h | 16 +- drivers/misc/habanalabs/mmu.c | 600 ++++++++++-------- 3 files changed, 356 insertions(+), 284 deletions(-) diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 9cc841777d81..6991a7f260e8 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -51,8 +51,9 @@ /** * struct pgt_info - MMU hop page info. - * @node: hash linked-list node for the pgts hash of pgts. - * @addr: physical address of the pgt. + * @node: hash linked-list node for the pgts shadow hash of pgts. + * @phys_addr: physical address of the pgt. + * @shadow_addr: shadow hop in the host. * @ctx: pointer to the owner ctx. * @num_of_ptes: indicates how many ptes are used in the pgt. * @@ -62,10 +63,11 @@ * page, it is freed with its pgt_info structure. */ struct pgt_info { - struct hlist_node node; - u64 addr; - struct hl_ctx *ctx; - int num_of_ptes; + struct hlist_node node; + u64 phys_addr; + u64 shadow_addr; + struct hl_ctx *ctx; + int num_of_ptes; }; struct hl_device; @@ -595,7 +597,8 @@ struct hl_va_range { * struct hl_ctx - user/kernel context. * @mem_hash: holds mapping from virtual address to virtual memory area * descriptor (hl_vm_phys_pg_list or hl_userptr). - * @mmu_hash: holds a mapping from virtual address to pgt_info structure. + * @mmu_phys_hash: holds a mapping from physical address to pgt_info structure. + * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure. * @hpriv: pointer to the private (KMD) data of the process (fd). * @hdev: pointer to the device structure. * @refcount: reference counter for the context. Context is released only when @@ -624,7 +627,8 @@ struct hl_va_range { */ struct hl_ctx { DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS); - DECLARE_HASHTABLE(mmu_hash, MMU_HASH_TABLE_BITS); + DECLARE_HASHTABLE(mmu_phys_hash, MMU_HASH_TABLE_BITS); + DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS); struct hl_fpriv *hpriv; struct hl_device *hdev; struct kref refcount; @@ -1066,7 +1070,8 @@ struct hl_device_reset_work { * @asic_specific: ASIC specific information to use only from ASIC files. * @mmu_pgt_pool: pool of available MMU hops. * @vm: virtual memory manager for MMU. - * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context + * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context. + * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone. * @hwmon_dev: H/W monitor device. * @pm_mng_profile: current power management profile. * @hl_chip_info: ASIC's sensors information. @@ -1136,6 +1141,7 @@ struct hl_device { struct gen_pool *mmu_pgt_pool; struct hl_vm vm; struct mutex mmu_cache_lock; + void *mmu_shadow_hop0; struct device *hwmon_dev; enum hl_pm_mng_profile pm_mng_profile; struct hwmon_chip_info *hl_chip_info; diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index b680052ee3f0..71ea3c3e8ba3 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -14,16 +14,16 @@ #define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB) #define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1)) -#define PAGE_PRESENT_MASK 0x0000000000001 -#define SWAP_OUT_MASK 0x0000000000004 -#define LAST_MASK 0x0000000000800 -#define PHYS_ADDR_MASK 0x3FFFFFFFFF000ull +#define PAGE_PRESENT_MASK 0x0000000000001ull +#define SWAP_OUT_MASK 0x0000000000004ull +#define LAST_MASK 0x0000000000800ull +#define PHYS_ADDR_MASK 0xFFFFFFFFFFFFF000ull #define HOP0_MASK 0x3000000000000ull #define HOP1_MASK 0x0FF8000000000ull #define HOP2_MASK 0x0007FC0000000ull -#define HOP3_MASK 0x000003FE00000 -#define HOP4_MASK 0x00000001FF000 -#define OFFSET_MASK 0x0000000000FFF +#define HOP3_MASK 0x000003FE00000ull +#define HOP4_MASK 0x00000001FF000ull +#define OFFSET_MASK 0x0000000000FFFull #define HOP0_SHIFT 48 #define HOP1_SHIFT 39 @@ -32,7 +32,7 @@ #define HOP4_SHIFT 12 #define PTE_PHYS_ADDR_SHIFT 12 -#define PTE_PHYS_ADDR_MASK ~0xFFF +#define PTE_PHYS_ADDR_MASK ~OFFSET_MASK #define HL_PTE_SIZE sizeof(u64) #define HOP_TABLE_SIZE PAGE_SIZE_4KB diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 3a5a2cec8305..533d9315b6fb 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -11,13 +11,15 @@ #include #include -static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 addr) +static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); + +static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) { struct pgt_info *pgt_info = NULL; - hash_for_each_possible(ctx->mmu_hash, pgt_info, node, - (unsigned long) addr) - if (addr == pgt_info->addr) + hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, + (unsigned long) hop_addr) + if (hop_addr == pgt_info->shadow_addr) break; return pgt_info; @@ -25,45 +27,109 @@ static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 addr) static void free_hop(struct hl_ctx *ctx, u64 hop_addr) { + struct hl_device *hdev = ctx->hdev; struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); - gen_pool_free(pgt_info->ctx->hdev->mmu_pgt_pool, pgt_info->addr, - ctx->hdev->asic_prop.mmu_hop_table_size); + gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr, + hdev->asic_prop.mmu_hop_table_size); hash_del(&pgt_info->node); - + kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); kfree(pgt_info); } static u64 alloc_hop(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct pgt_info *pgt_info; - u64 addr; + u64 phys_addr, shadow_addr; pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); if (!pgt_info) return ULLONG_MAX; - addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool, - hdev->asic_prop.mmu_hop_table_size); - if (!addr) { + phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool, + prop->mmu_hop_table_size); + if (!phys_addr) { dev_err(hdev->dev, "failed to allocate page\n"); - kfree(pgt_info); - return ULLONG_MAX; + goto pool_add_err; } - pgt_info->addr = addr; + shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, + GFP_KERNEL); + if (!shadow_addr) + goto shadow_err; + + pgt_info->phys_addr = phys_addr; + pgt_info->shadow_addr = shadow_addr; pgt_info->ctx = ctx; pgt_info->num_of_ptes = 0; - hash_add(ctx->mmu_hash, &pgt_info->node, addr); + hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); - return addr; + return shadow_addr; + +shadow_err: + gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size); +pool_add_err: + kfree(pgt_info); + + return ULLONG_MAX; } -static inline void clear_pte(struct hl_device *hdev, u64 pte_addr) +static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) { - /* clear the last and present bits */ - hdev->asic_funcs->write_pte(hdev, pte_addr, 0); + return ctx->hdev->asic_prop.mmu_pgt_addr + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static inline u64 get_hop0_addr(struct hl_ctx *ctx) +{ + return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static inline void flush(struct hl_ctx *ctx) +{ + /* flush all writes from all cores to reach PCI */ + mb(); + ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); +} + +/* transform the value to physical address when writing to H/W */ +static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) +{ + /* + * The value to write is actually the address of the next shadow hop + + * flags at the 12 LSBs. + * Hence in order to get the value to write to the physical PTE, we + * clear the 12 LSBs and translate the shadow hop to its associated + * physical hop, and add back the original 12 LSBs. + */ + u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) | + (val & OFFSET_MASK); + + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + get_phys_addr(ctx, shadow_pte_addr), + phys_val); + + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +/* do not transform the value to physical address when writing to H/W */ +static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, + u64 val) +{ + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + get_phys_addr(ctx, shadow_pte_addr), + val); + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +/* clear the last and present bits */ +static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) +{ + /* no need to transform the value to physical address */ + write_final_pte(ctx, pte_addr, 0); } static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) @@ -98,12 +164,6 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) return num_of_ptes_left; } -static inline u64 get_hop0_addr(struct hl_ctx *ctx) -{ - return ctx->hdev->asic_prop.mmu_pgt_addr + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 virt_addr, u64 mask, u64 shift) { @@ -136,7 +196,7 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT); } -static inline u64 get_next_hop_addr(u64 curr_pte) +static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) { if (curr_pte & PAGE_PRESENT_MASK) return curr_pte & PHYS_ADDR_MASK; @@ -147,7 +207,7 @@ static inline u64 get_next_hop_addr(u64 curr_pte) static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop) { - u64 hop_addr = get_next_hop_addr(curr_pte); + u64 hop_addr = get_next_hop_addr(ctx, curr_pte); if (hop_addr == ULLONG_MAX) { hop_addr = alloc_hop(ctx); @@ -157,106 +217,30 @@ static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, return hop_addr; } -/* - * hl_mmu_init - init the mmu module - * - * @hdev: pointer to the habanalabs device structure - * - * This function does the following: - * - Allocate max_asid zeroed hop0 pgts so no mapping is available - * - Enable mmu in hw - * - Invalidate the mmu cache - * - Create a pool of pages for pgts - * - Returns 0 on success - * - * This function depends on DMA QMAN to be working! - */ -int hl_mmu_init(struct hl_device *hdev) +/* translates shadow address inside hop to a physical address */ +static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) { - struct asic_fixed_properties *prop = &hdev->asic_prop; - int rc; + u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); + u64 shadow_hop_addr = shadow_addr & ~page_mask; + u64 pte_offset = shadow_addr & page_mask; + u64 phys_hop_addr; - if (!hdev->mmu_enable) - return 0; + if (shadow_hop_addr != get_hop0_addr(ctx)) + phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; + else + phys_hop_addr = get_phys_hop0_addr(ctx); - /* MMU HW init was already done in device hw_init() */ - - mutex_init(&hdev->mmu_cache_lock); - - hdev->mmu_pgt_pool = - gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); - - if (!hdev->mmu_pgt_pool) { - dev_err(hdev->dev, "Failed to create page gen pool\n"); - rc = -ENOMEM; - goto err_pool_create; - } - - rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr + - prop->mmu_hop0_tables_total_size, - prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, - -1); - if (rc) { - dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); - goto err_pool_add; - } - - return 0; - -err_pool_add: - gen_pool_destroy(hdev->mmu_pgt_pool); -err_pool_create: - mutex_destroy(&hdev->mmu_cache_lock); - - return rc; + return phys_hop_addr + pte_offset; } -/* - * hl_mmu_fini - release the mmu module. - * - * @hdev: pointer to the habanalabs device structure - * - * This function does the following: - * - Disable mmu in hw - * - free the pgts pool - * - * All ctxs should be freed before calling this func - */ -void hl_mmu_fini(struct hl_device *hdev) -{ - if (!hdev->mmu_enable) - return; - - gen_pool_destroy(hdev->mmu_pgt_pool); - - mutex_destroy(&hdev->mmu_cache_lock); - - /* MMU HW fini will be done in device hw_fini() */ -} - -/** - * hl_mmu_ctx_init() - initialize a context for using the MMU module. - * @ctx: pointer to the context structure to initialize. - * - * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all - * page tables hops related to this context and an optional DRAM default page - * mapping. - * Return: 0 on success, non-zero otherwise. - */ -int hl_mmu_ctx_init(struct hl_ctx *ctx) +static int dram_default_mapping_init(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 num_of_hop3, total_hops, hop1_addr, hop2_addr, hop2_pte_addr, - hop3_pte_addr, pte_val; + u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, + hop2_pte_addr, hop3_pte_addr, pte_val; int rc, i, j, hop3_allocated = 0; - if (!hdev->mmu_enable) - return 0; - - mutex_init(&ctx->mmu_lock); - hash_init(ctx->mmu_hash); - if (!hdev->dram_supports_virtual_memory || !hdev->dram_default_page_mapping) return 0; @@ -269,10 +253,10 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) total_hops = num_of_hop3 + 2; ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); - if (!ctx->dram_default_hops) { - rc = -ENOMEM; - goto alloc_err; - } + if (!ctx->dram_default_hops) + return -ENOMEM; + + hop0_addr = get_hop0_addr(ctx); hop1_addr = alloc_hop(ctx); if (hop1_addr == ULLONG_MAX) { @@ -304,17 +288,17 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) /* need only pte 0 in hops 0 and 1 */ pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - hdev->asic_funcs->write_pte(hdev, get_hop0_addr(ctx), pte_val); + write_pte(ctx, hop0_addr, pte_val); pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - hdev->asic_funcs->write_pte(hdev, hop1_addr, pte_val); + write_pte(ctx, hop1_addr, pte_val); get_pte(ctx, hop1_addr); hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - hdev->asic_funcs->write_pte(hdev, hop2_pte_addr, pte_val); + write_pte(ctx, hop2_pte_addr, pte_val); get_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } @@ -325,33 +309,183 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) for (i = 0 ; i < num_of_hop3 ; i++) { hop3_pte_addr = ctx->dram_default_hops[i]; for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { - hdev->asic_funcs->write_pte(hdev, hop3_pte_addr, - pte_val); + write_final_pte(ctx, hop3_pte_addr, pte_val); get_pte(ctx, ctx->dram_default_hops[i]); hop3_pte_addr += HL_PTE_SIZE; } } - /* flush all writes to reach PCI */ - mb(); - hdev->asic_funcs->read_pte(hdev, hop2_addr); + flush(ctx); return 0; hop3_err: for (i = 0 ; i < hop3_allocated ; i++) free_hop(ctx, ctx->dram_default_hops[i]); + free_hop(ctx, hop2_addr); hop2_err: free_hop(ctx, hop1_addr); hop1_err: kfree(ctx->dram_default_hops); -alloc_err: - mutex_destroy(&ctx->mmu_lock); return rc; } +static void dram_default_mapping_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, + hop2_pte_addr, hop3_pte_addr; + int i, j; + + if (!hdev->dram_supports_virtual_memory || + !hdev->dram_default_page_mapping) + return; + + num_of_hop3 = prop->dram_size_for_default_page_mapping; + do_div(num_of_hop3, prop->dram_page_size); + do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); + + hop0_addr = get_hop0_addr(ctx); + /* add hop1 and hop2 */ + total_hops = num_of_hop3 + 2; + hop1_addr = ctx->dram_default_hops[total_hops - 1]; + hop2_addr = ctx->dram_default_hops[total_hops - 2]; + + for (i = 0 ; i < num_of_hop3 ; i++) { + hop3_pte_addr = ctx->dram_default_hops[i]; + for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + clear_pte(ctx, hop3_pte_addr); + put_pte(ctx, ctx->dram_default_hops[i]); + hop3_pte_addr += HL_PTE_SIZE; + } + } + + hop2_pte_addr = hop2_addr; + hop2_pte_addr = hop2_addr; + for (i = 0 ; i < num_of_hop3 ; i++) { + clear_pte(ctx, hop2_pte_addr); + put_pte(ctx, hop2_addr); + hop2_pte_addr += HL_PTE_SIZE; + } + + clear_pte(ctx, hop1_addr); + put_pte(ctx, hop1_addr); + clear_pte(ctx, hop0_addr); + + kfree(ctx->dram_default_hops); + + flush(ctx); +} + +/** + * hl_mmu_init() - initialize the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Allocate max_asid zeroed hop0 pgts so no mapping is available. + * - Enable MMU in H/W. + * - Invalidate the MMU cache. + * - Create a pool of pages for pgt_infos. + * + * This function depends on DMA QMAN to be working! + * + * Return: 0 for success, non-zero for failure. + */ +int hl_mmu_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!hdev->mmu_enable) + return 0; + + /* MMU H/W init was already done in device hw_init() */ + + mutex_init(&hdev->mmu_cache_lock); + + hdev->mmu_pgt_pool = + gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); + + if (!hdev->mmu_pgt_pool) { + dev_err(hdev->dev, "Failed to create page gen pool\n"); + rc = -ENOMEM; + goto err_pool_create; + } + + rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr + + prop->mmu_hop0_tables_total_size, + prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, + -1); + if (rc) { + dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); + goto err_pool_add; + } + + hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, + prop->mmu_hop_table_size, + GFP_KERNEL | __GFP_ZERO); + if (!hdev->mmu_shadow_hop0) { + rc = -ENOMEM; + goto err_pool_add; + } + + return 0; + +err_pool_add: + gen_pool_destroy(hdev->mmu_pgt_pool); +err_pool_create: + mutex_destroy(&hdev->mmu_cache_lock); + + return rc; +} + +/** + * hl_mmu_fini() - release the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Disable MMU in H/W. + * - Free the pgt_infos pool. + * + * All contexts should be freed before calling this function. + */ +void hl_mmu_fini(struct hl_device *hdev) +{ + if (!hdev->mmu_enable) + return; + + kvfree(hdev->mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_pgt_pool); + mutex_destroy(&hdev->mmu_cache_lock); + + /* MMU H/W fini will be done in device hw_fini() */ +} + +/** + * hl_mmu_ctx_init() - initialize a context for using the MMU module. + * @ctx: pointer to the context structure to initialize. + * + * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all + * page tables hops related to this context. + * Return: 0 on success, non-zero otherwise. + */ +int hl_mmu_ctx_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (!hdev->mmu_enable) + return 0; + + mutex_init(&ctx->mmu_lock); + hash_init(ctx->mmu_phys_hash); + hash_init(ctx->mmu_shadow_hash); + + return dram_default_mapping_init(ctx); +} + /* * hl_mmu_ctx_fini - disable a ctx from using the mmu module * @@ -365,63 +499,23 @@ alloc_err: void hl_mmu_ctx_fini(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; struct pgt_info *pgt_info; struct hlist_node *tmp; - u64 num_of_hop3, total_hops, hop1_addr, hop2_addr, hop2_pte_addr, - hop3_pte_addr; - int i, j; + int i; - if (!ctx->hdev->mmu_enable) + if (!hdev->mmu_enable) return; - if (hdev->dram_supports_virtual_memory && - hdev->dram_default_page_mapping) { + dram_default_mapping_fini(ctx); - num_of_hop3 = prop->dram_size_for_default_page_mapping; - do_div(num_of_hop3, prop->dram_page_size); - do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); - - /* add hop1 and hop2 */ - total_hops = num_of_hop3 + 2; - hop1_addr = ctx->dram_default_hops[total_hops - 1]; - hop2_addr = ctx->dram_default_hops[total_hops - 2]; - - for (i = 0 ; i < num_of_hop3 ; i++) { - hop3_pte_addr = ctx->dram_default_hops[i]; - for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { - clear_pte(hdev, hop3_pte_addr); - put_pte(ctx, ctx->dram_default_hops[i]); - hop3_pte_addr += HL_PTE_SIZE; - } - } - - hop2_pte_addr = hop2_addr; - for (i = 0 ; i < num_of_hop3 ; i++) { - clear_pte(hdev, hop2_pte_addr); - put_pte(ctx, hop2_addr); - hop2_pte_addr += HL_PTE_SIZE; - } - - clear_pte(hdev, hop1_addr); - put_pte(ctx, hop1_addr); - clear_pte(hdev, get_hop0_addr(ctx)); - - kfree(ctx->dram_default_hops); - - /* flush all writes to reach PCI */ - mb(); - hdev->asic_funcs->read_pte(hdev, hop2_addr); - } - - if (!hash_empty(ctx->mmu_hash)) + if (!hash_empty(ctx->mmu_shadow_hash)) dev_err(hdev->dev, "ctx is freed while it has pgts in use\n"); - hash_for_each_safe(ctx->mmu_hash, i, tmp, pgt_info, node) { + hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { dev_err(hdev->dev, "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", - pgt_info->addr, ctx->asid, pgt_info->num_of_ptes); - free_hop(ctx, pgt_info->addr); + pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); + free_hop(ctx, pgt_info->shadow_addr); } mutex_destroy(&ctx->mmu_lock); @@ -437,45 +531,43 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) hop3_addr = 0, hop3_pte_addr = 0, hop4_addr = 0, hop4_pte_addr = 0, curr_pte; - int clear_hop3 = 1; - bool is_dram_addr, is_huge, is_dram_default_page_mapping; + bool is_dram_addr, is_huge, clear_hop3 = true; is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB, prop->va_space_dram_start_address, prop->va_space_dram_end_address); hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); - curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - hop1_addr = get_next_hop_addr(curr_pte); + hop1_addr = get_next_hop_addr(ctx, curr_pte); if (hop1_addr == ULLONG_MAX) goto not_mapped; hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); - curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - hop2_addr = get_next_hop_addr(curr_pte); + hop2_addr = get_next_hop_addr(ctx, curr_pte); if (hop2_addr == ULLONG_MAX) goto not_mapped; hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); - curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; - hop3_addr = get_next_hop_addr(curr_pte); + hop3_addr = get_next_hop_addr(ctx, curr_pte); if (hop3_addr == ULLONG_MAX) goto not_mapped; hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); - curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; is_huge = curr_pte & LAST_MASK; @@ -485,27 +577,24 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) return -EFAULT; } - is_dram_default_page_mapping = - hdev->dram_default_page_mapping && is_dram_addr; - if (!is_huge) { - hop4_addr = get_next_hop_addr(curr_pte); + hop4_addr = get_next_hop_addr(ctx, curr_pte); if (hop4_addr == ULLONG_MAX) goto not_mapped; hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); - curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; - clear_hop3 = 0; + clear_hop3 = false; } - if (is_dram_default_page_mapping) { - u64 zero_pte = (prop->mmu_dram_default_page_addr & + if (hdev->dram_default_page_mapping && is_dram_addr) { + u64 default_pte = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; - if (curr_pte == zero_pte) { + if (curr_pte == default_pte) { dev_err(hdev->dev, "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", virt_addr); @@ -519,40 +608,43 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) goto not_mapped; } - hdev->asic_funcs->write_pte(hdev, hop3_pte_addr, zero_pte); + write_final_pte(ctx, hop3_pte_addr, default_pte); put_pte(ctx, hop3_addr); } else { if (!(curr_pte & PAGE_PRESENT_MASK)) goto not_mapped; - clear_pte(hdev, hop4_addr ? hop4_pte_addr : hop3_pte_addr); + if (hop4_addr) + clear_pte(ctx, hop4_pte_addr); + else + clear_pte(ctx, hop3_pte_addr); if (hop4_addr && !put_pte(ctx, hop4_addr)) - clear_hop3 = 1; + clear_hop3 = true; if (!clear_hop3) goto flush; - clear_pte(hdev, hop3_pte_addr); + + clear_pte(ctx, hop3_pte_addr); if (put_pte(ctx, hop3_addr)) goto flush; - clear_pte(hdev, hop2_pte_addr); + + clear_pte(ctx, hop2_pte_addr); if (put_pte(ctx, hop2_addr)) goto flush; - clear_pte(hdev, hop1_pte_addr); + + clear_pte(ctx, hop1_pte_addr); if (put_pte(ctx, hop1_addr)) goto flush; - clear_pte(hdev, hop0_pte_addr); + + clear_pte(ctx, hop0_pte_addr); } flush: - /* flush all writes from all cores to reach PCI */ - mb(); - - hdev->asic_funcs->read_pte(hdev, - hop4_addr ? hop4_pte_addr : hop3_pte_addr); + flush(ctx); return 0; @@ -632,8 +724,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, hop4_addr = 0, hop4_pte_addr = 0, curr_pte = 0; bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge, is_dram_addr, - is_dram_default_page_mapping; + hop4_new = false, is_huge, is_dram_addr; int rc = -ENOMEM; /* @@ -654,59 +745,46 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, return -EFAULT; } - is_dram_default_page_mapping = - hdev->dram_default_page_mapping && is_dram_addr; - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); - - curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); - if (hop1_addr == ULLONG_MAX) goto err; hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); - - curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); - if (hop2_addr == ULLONG_MAX) goto err; hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); - - curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); - if (hop3_addr == ULLONG_MAX) goto err; hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); - - curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; if (!is_huge) { hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); - if (hop4_addr == ULLONG_MAX) goto err; hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); - - curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); + curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; } - if (is_dram_default_page_mapping) { - u64 zero_pte = (prop->mmu_dram_default_page_addr & + if (hdev->dram_default_page_mapping && is_dram_addr) { + u64 default_pte = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; - if (curr_pte != zero_pte) { + if (curr_pte != default_pte) { dev_err(hdev->dev, "DRAM: mapping already exists for virt_addr 0x%llx\n", virt_addr); @@ -722,27 +800,22 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, } } else if (curr_pte & PAGE_PRESENT_MASK) { dev_err(hdev->dev, - "mapping already exists for virt_addr 0x%llx\n", - virt_addr); + "mapping already exists for virt_addr 0x%llx\n", + virt_addr); dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", - hdev->asic_funcs->read_pte(hdev, hop0_pte_addr), - hop0_pte_addr); + *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", - hdev->asic_funcs->read_pte(hdev, hop1_pte_addr), - hop1_pte_addr); + *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", - hdev->asic_funcs->read_pte(hdev, hop2_pte_addr), - hop2_pte_addr); + *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", - hdev->asic_funcs->read_pte(hdev, hop3_pte_addr), - hop3_pte_addr); + *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); if (!is_huge) dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", - hdev->asic_funcs->read_pte(hdev, - hop4_pte_addr), - hop4_pte_addr); + *(u64 *) (uintptr_t) hop4_pte_addr, + hop4_pte_addr); rc = -EINVAL; goto err; @@ -751,28 +824,26 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; - hdev->asic_funcs->write_pte(hdev, - is_huge ? hop3_pte_addr : hop4_pte_addr, - curr_pte); + if (is_huge) + write_final_pte(ctx, hop3_pte_addr, curr_pte); + else + write_final_pte(ctx, hop4_pte_addr, curr_pte); if (hop1_new) { - curr_pte = (hop1_addr & PTE_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop0_pte_addr, - curr_pte); + curr_pte = + (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop0_pte_addr, curr_pte); } if (hop2_new) { - curr_pte = (hop2_addr & PTE_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop1_pte_addr, - curr_pte); + curr_pte = + (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop1_pte_addr, curr_pte); get_pte(ctx, hop1_addr); } if (hop3_new) { - curr_pte = (hop3_addr & PTE_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop2_pte_addr, - curr_pte); + curr_pte = + (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop2_pte_addr, curr_pte); get_pte(ctx, hop2_addr); } @@ -780,8 +851,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hop4_new) { curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - ctx->hdev->asic_funcs->write_pte(ctx->hdev, - hop3_pte_addr, curr_pte); + write_pte(ctx, hop3_pte_addr, curr_pte); get_pte(ctx, hop3_addr); } @@ -790,11 +860,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, get_pte(ctx, hop3_addr); } - /* flush all writes from all cores to reach PCI */ - mb(); - - hdev->asic_funcs->read_pte(hdev, - is_huge ? hop3_pte_addr : hop4_pte_addr); + flush(ctx); return 0; From d9973871dae1805678ac905318a5d4cecceb6524 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 7 Mar 2019 18:03:23 +0200 Subject: [PATCH 14/31] habanalabs: keep track of the device's dma mask This patch refactors the code that is responsible to set the DMA mask for the device. Upon each change of the dma mask, the driver will save the new value that was set. This is needed in order to make sure we don't try to increase the mask a second time, in case we failed in the first time. This is especially relevant for Power machines, as that may cause a change in configuration of the TVT which will break the device. Goya will first try to set the device's dma mask to 39 bits, so that the memory that is allocated on the host machine for communication with the device's cpu will be in a bus address which is lower then 39 bits. Later, Goya will try to increase that mask to 48 bits, but only if setting the mask to 39 bits was successful. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 32 ++++-------- drivers/misc/habanalabs/habanalabs.h | 5 +- drivers/misc/habanalabs/habanalabs_drv.c | 3 ++ drivers/misc/habanalabs/pci.c | 64 ++++++++++++++++++------ 4 files changed, 65 insertions(+), 39 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 630979422390..c69f3b928350 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -482,7 +482,7 @@ static int goya_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); - rc = hl_pci_init(hdev); + rc = hl_pci_init(hdev, 39); if (rc) return rc; @@ -2445,28 +2445,16 @@ static int goya_hw_init(struct hl_device *hdev) goto disable_msix; } - /* CPU initialization is finished, we can now move to 48 bit DMA mask */ - rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(48)); - if (rc) { - dev_warn(hdev->dev, "Unable to set pci dma mask to 48 bits\n"); - rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(32)); - if (rc) { - dev_err(hdev->dev, - "Unable to set pci dma mask to 32 bits\n"); + /* + * Check if we managed to set the DMA mask to more then 32 bits. If so, + * let's try to increase it again because in Goya we set the initial + * dma mask to less then 39 bits so that the allocation of the memory + * area for the device's cpu will be under 39 bits + */ + if (hdev->dma_mask > 32) { + rc = hl_pci_set_dma_mask(hdev, 48); + if (rc) goto disable_pci_access; - } - } - - rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(48)); - if (rc) { - dev_warn(hdev->dev, - "Unable to set pci consistent dma mask to 48 bits\n"); - rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(32)); - if (rc) { - dev_err(hdev->dev, - "Unable to set pci consistent dma mask to 32 bits\n"); - goto disable_pci_access; - } } /* Perform read from the device to flush all MSI-X configuration */ diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 6991a7f260e8..e9253d937bfc 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1106,6 +1106,7 @@ struct hl_device_reset_work { * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) + * @dma_mask: the dma mask that was set for this device */ struct hl_device { struct pci_dev *pdev; @@ -1176,6 +1177,7 @@ struct hl_device { u8 dram_default_page_mapping; u8 init_done; u8 device_cpu_disabled; + u8 dma_mask; /* Parameters for bring-up */ u8 mmu_enable; @@ -1397,8 +1399,9 @@ int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar, u64 addr); int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address, u64 dram_base_address, u64 host_phys_size); -int hl_pci_init(struct hl_device *hdev); +int hl_pci_init(struct hl_device *hdev, u8 dma_mask); void hl_pci_fini(struct hl_device *hdev); +int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask); long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 748601463f11..b697339d3904 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -229,6 +229,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->asic_type = asic_type; } + /* Set default DMA mask to 32 bits */ + hdev->dma_mask = 32; + mutex_lock(&hl_devs_idr_lock); if (minor == -1) { diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c index 822ac110b997..d472d02a8e6e 100644 --- a/drivers/misc/habanalabs/pci.c +++ b/drivers/misc/habanalabs/pci.c @@ -287,42 +287,74 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address, } /** - * hl_pci_init() - PCI initialization code. + * hl_pci_set_dma_mask() - Set DMA masks for the device. * @hdev: Pointer to hl_device structure. + * @dma_mask: number of bits for the requested dma mask. * - * Set DMA masks, initialize the PCI controller and map the PCI BARs. + * This function sets the DMA masks (regular and consistent) for a specified + * value. If it doesn't succeed, it tries to set it to a fall-back value * * Return: 0 on success, non-zero for failure. */ -int hl_pci_init(struct hl_device *hdev) +int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask) { struct pci_dev *pdev = hdev->pdev; int rc; /* set DMA mask */ - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask)); if (rc) { - dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n"); - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + dev_warn(hdev->dev, + "Failed to set pci dma mask to %d bits, error %d\n", + dma_mask, rc); + + dma_mask = hdev->dma_mask; + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask)); if (rc) { dev_err(hdev->dev, - "Unable to set pci dma mask to 32 bits\n"); + "Failed to set pci dma mask to %d bits, error %d\n", + dma_mask, rc); return rc; } } - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + /* + * We managed to set the dma mask, so update the dma mask field. If + * the set to the coherent mask will fail with that mask, we will + * fail the entire function + */ + hdev->dma_mask = dma_mask; + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_mask)); if (rc) { - dev_warn(hdev->dev, - "Unable to set pci consistent dma mask to 39 bits\n"); - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - dev_err(hdev->dev, - "Unable to set pci consistent dma mask to 32 bits\n"); - return rc; - } + dev_err(hdev->dev, + "Failed to set pci consistent dma mask to %d bits, error %d\n", + dma_mask, rc); + return rc; } + return 0; +} + +/** + * hl_pci_init() - PCI initialization code. + * @hdev: Pointer to hl_device structure. + * @dma_mask: number of bits for the requested dma mask. + * + * Set DMA masks, initialize the PCI controller and map the PCI BARs. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_pci_init(struct hl_device *hdev, u8 dma_mask) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + rc = hl_pci_set_dma_mask(hdev, dma_mask); + if (rc) + return rc; + if (hdev->reset_pcilink) hl_pci_reset_link_through_bridge(hdev); From 0878a42086e0a6228f804655bb544147a531bb57 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 17 Mar 2019 09:12:29 +0200 Subject: [PATCH 15/31] habanalabs: never fail hard reset of device Hard-reset of our device should never fail, due to dangers of permanent damage to the H/W. This patch removes the last place in the reset path where the driver might exit before doing the actual reset. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 77d51be66c7e..c51d1062d0bc 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -663,17 +663,9 @@ again: /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); - if (hard_reset) { - /* Release kernel context */ - if (hl_ctx_put(hdev->kernel_ctx) != 1) { - dev_err(hdev->dev, - "kernel ctx is alive during hard reset\n"); - rc = -EBUSY; - goto out_err; - } - + /* Release kernel context */ + if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1)) hdev->kernel_ctx = NULL; - } /* Reset the H/W. It will be in idle state after this returns */ hdev->asic_funcs->hw_fini(hdev, hard_reset); @@ -699,6 +691,13 @@ again: if (hard_reset) { hdev->device_cpu_disabled = false; + if (hdev->kernel_ctx) { + dev_crit(hdev->dev, + "kernel ctx was alive during hard reset, something is terribly wrong\n"); + rc = -EBUSY; + goto out_err; + } + /* Allocate the kernel context */ hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); From a691a1ebb59e7f2a159df8d3c693e2235c5de9dd Mon Sep 17 00:00:00 2001 From: Dalit Ben Zoor Date: Wed, 20 Mar 2019 16:13:23 +0200 Subject: [PATCH 16/31] habanalabs: set new golden value to tpc clock relaxation On init or context switch, set TPC clock relaxation counter register to a golden value. Signed-off-by: Dalit Ben Zoor Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index c69f3b928350..9f775dd81a52 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1436,6 +1436,7 @@ static void goya_init_golden_registers(struct hl_device *hdev) */ WREG32(mmDMA_CH_0_CFG0, 0x0fff0010); WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0); + WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); goya->hw_cap_initialized |= HW_CAP_GOLDEN; } @@ -4458,6 +4459,7 @@ static int goya_context_switch(struct hl_device *hdev, u32 asid) return rc; } + WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); goya_mmu_prepare(hdev, asid); return 0; From 9354c29ed52762e9645d6dc2631045228ed10fbd Mon Sep 17 00:00:00 2001 From: Dalit Ben Zoor Date: Thu, 21 Mar 2019 09:54:49 +0200 Subject: [PATCH 17/31] habanalabs: allow user to modify TPC clock relaxation value This patch allows the user to modify the TPC PLL clock relaxation value on-the-fly in order to reduce power consumption. To enable this, the patch removes the protection from the specific register that controls this behavior. Signed-off-by: Dalit Ben Zoor Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya_security.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya_security.c b/drivers/misc/habanalabs/goya/goya_security.c index 9d2dee67e46f..d95d1b2f860d 100644 --- a/drivers/misc/habanalabs/goya/goya_security.c +++ b/drivers/misc/habanalabs/goya/goya_security.c @@ -2160,6 +2160,8 @@ static void goya_init_protection_bits(struct hl_device *hdev) * Bits 7-11 represents the word offset inside the 128 bytes. * Bits 2-6 represents the bit location inside the word. */ + u32 pb_addr, mask; + u8 word_offset; goya_pb_set_block(hdev, mmPCI_NRTR_BASE); goya_pb_set_block(hdev, mmPCI_RD_REGULATOR_BASE); @@ -2238,6 +2240,14 @@ static void goya_init_protection_bits(struct hl_device *hdev) goya_pb_set_block(hdev, mmPCIE_AUX_BASE); goya_pb_set_block(hdev, mmPCIE_DB_RSV_BASE); goya_pb_set_block(hdev, mmPCIE_PHY_BASE); + goya_pb_set_block(hdev, mmTPC0_NRTR_BASE); + goya_pb_set_block(hdev, mmTPC_PLL_BASE); + + pb_addr = (mmTPC_PLL_CLK_RLX_0 & ~0xFFF) + PROT_BITS_OFFS; + word_offset = ((mmTPC_PLL_CLK_RLX_0 & PROT_BITS_OFFS) >> 7) << 2; + mask = 1 << ((mmTPC_PLL_CLK_RLX_0 & 0x7C) >> 2); + + WREG32(pb_addr + word_offset, mask); goya_init_mme_protection_bits(hdev); @@ -2295,8 +2305,8 @@ void goya_init_security(struct hl_device *hdev) u32 lbw_rng10_base = 0xFCC60000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; u32 lbw_rng10_mask = 0xFFFE0000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; - u32 lbw_rng11_base = 0xFCE00000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; - u32 lbw_rng11_mask = 0xFFFFC000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; + u32 lbw_rng11_base = 0xFCE02000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; + u32 lbw_rng11_mask = 0xFFFFE000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; u32 lbw_rng12_base = 0xFE484000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; u32 lbw_rng12_mask = 0xFFFFF000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK; From aa957088b4e846146b230e431dd9dad59e907f9a Mon Sep 17 00:00:00 2001 From: Dalit Ben Zoor Date: Sun, 24 Mar 2019 10:15:44 +0200 Subject: [PATCH 18/31] habanalabs: add device status option to INFO IOCTL This patch adds a new opcode to INFO IOCTL that returns the device status. This will allow users to query the device status in order to avoid sending command submissions while device is in reset. Signed-off-by: Dalit Ben Zoor Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 15 +++++++++++++++ drivers/misc/habanalabs/habanalabs.h | 1 + drivers/misc/habanalabs/habanalabs_ioctl.c | 19 +++++++++++++++++++ include/uapi/misc/habanalabs.h | 12 ++++++++++++ 4 files changed, 47 insertions(+) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index c51d1062d0bc..e3797f582436 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -10,6 +10,7 @@ #include #include #include +#include #define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10) @@ -21,6 +22,20 @@ bool hl_device_disabled_or_in_reset(struct hl_device *hdev) return false; } +enum hl_device_status hl_device_status(struct hl_device *hdev) +{ + enum hl_device_status status; + + if (hdev->disabled) + status = HL_DEVICE_STATUS_MALFUNCTION; + else if (atomic_read(&hdev->in_reset)) + status = HL_DEVICE_STATUS_IN_RESET; + else + status = HL_DEVICE_STATUS_OPERATIONAL; + + return status; +}; + static void hpriv_release(struct kref *ref) { struct hl_fpriv *hpriv; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index e9253d937bfc..fd7fcdf7050f 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1272,6 +1272,7 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, int hl_device_open(struct inode *inode, struct file *filp); bool hl_device_disabled_or_in_reset(struct hl_device *hdev); +enum hl_device_status hl_device_status(struct hl_device *hdev); int create_hdev(struct hl_device **dev, struct pci_dev *pdev, enum hl_asic_type asic_type, int minor); void destroy_hdev(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 37f9de8e7404..9000ff615805 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -12,6 +12,21 @@ #include #include +static int device_status_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_device_status dev_stat = {0}; + u32 size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!size) || (!out)) + return -EINVAL; + + dev_stat.status = hl_device_status(hdev); + + return copy_to_user(out, &dev_stat, + min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0; +} + static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) { struct hl_info_hw_ip_info hw_ip = {0}; @@ -105,6 +120,10 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) struct hl_device *hdev = hpriv->hdev; int rc; + /* We want to return device status even if it disabled or in reset */ + if (args->op == HL_INFO_DEVICE_STATUS) + return device_status_info(hdev, args); + if (hl_device_disabled_or_in_reset(hdev)) { dev_warn_ratelimited(hdev->dev, "Device is disabled or in reset. Can't execute INFO IOCTL\n"); diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index ab1957e31077..993a79edad73 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -45,11 +45,18 @@ enum goya_queue_id { GOYA_QUEUE_ID_SIZE }; +enum hl_device_status { + HL_DEVICE_STATUS_OPERATIONAL, + HL_DEVICE_STATUS_IN_RESET, + HL_DEVICE_STATUS_MALFUNCTION +}; + /* Opcode for management ioctl */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 #define HL_INFO_DRAM_USAGE 2 #define HL_INFO_HW_IDLE 3 +#define HL_INFO_DEVICE_STATUS 4 #define HL_INFO_VERSION_MAX_LEN 128 @@ -82,6 +89,11 @@ struct hl_info_hw_idle { __u32 pad; }; +struct hl_info_device_status { + __u32 status; + __u32 pad; +}; + struct hl_info_args { /* Location of relevant struct in userspace */ __u64 return_pointer; From cab8e3e20d5cb89c876c30d24d38f1caf1d9bdb8 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 27 Mar 2019 09:44:28 +0200 Subject: [PATCH 19/31] habanalabs: improve error messages This patch improves two error messages to help the user to better understand what error occurred. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_submission.c | 3 ++- drivers/misc/habanalabs/memory.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index f908643f871f..02c48da0b645 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -261,7 +261,8 @@ static void cs_timedout(struct work_struct *work) ctx_asid = cs->ctx->asid; /* TODO: add information about last signaled seq and last emitted seq */ - dev_err(hdev->dev, "CS %d.%llu got stuck!\n", ctx_asid, cs->sequence); + dev_err(hdev->dev, "User %d command submission %llu got stuck!\n", + ctx_asid, cs->sequence); cs_put(cs); diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index ce1fda40a8b8..39788b1cf8d0 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -109,7 +109,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, page_size); if (!phys_pg_pack->pages[i]) { dev_err(hdev->dev, - "ioctl failed to allocate page\n"); + "Failed to allocate device memory (out of memory)\n"); rc = -ENOMEM; goto page_err; } From 9336c0216782d3a4cd108b584efe24b64cad8a63 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 31 Mar 2019 11:29:53 +0300 Subject: [PATCH 20/31] habanalabs: remove trailing blank line from EOF GIT does not like extra blank lines at the end of the file, so this patch removes those lines. Signed-off-by: Oded Gabbay Reviewed-by: Mukesh Ojha --- .../misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h | 1 - .../misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h | 1 - .../habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h | 1 - .../habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h | 1 - .../misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h | 1 - .../misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h | 1 - drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h | 1 - 91 files changed, 91 deletions(-) diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h index 2cf5c46b6e8e..4e0dbbbbde20 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h @@ -188,4 +188,3 @@ #define CPU_CA53_CFG_ARM_PMU_EVENT_MASK 0x3FFFFFFF #endif /* ASIC_REG_CPU_CA53_CFG_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h index 840ccffa1081..f3faf1aad91a 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h @@ -58,4 +58,3 @@ #define mmCPU_CA53_CFG_ARM_PMU_1 0x441214 #endif /* ASIC_REG_CPU_CA53_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h index f23cb3e41c30..cf657918962a 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h @@ -46,4 +46,3 @@ #define mmCPU_IF_AXI_SPLIT_INTR 0x442130 #endif /* ASIC_REG_CPU_IF_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h index 8fc97f838ada..8c8f9726d4b9 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h @@ -102,4 +102,3 @@ #define mmCPU_PLL_FREQ_CALC_EN 0x4A2440 #endif /* ASIC_REG_CPU_PLL_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h index 61c8cd9ce58b..0b246fe6ad04 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h @@ -206,4 +206,3 @@ #define mmDMA_CH_0_MEM_INIT_BUSY 0x4011FC #endif /* ASIC_REG_DMA_CH_0_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h index 92960ef5e308..5449031722f2 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h @@ -206,4 +206,3 @@ #define mmDMA_CH_1_MEM_INIT_BUSY 0x4091FC #endif /* ASIC_REG_DMA_CH_1_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h index 4e37871a51bb..a4768521d18a 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h @@ -206,4 +206,3 @@ #define mmDMA_CH_2_MEM_INIT_BUSY 0x4111FC #endif /* ASIC_REG_DMA_CH_2_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h index a2d6aeb32a18..619d01897ff8 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h @@ -206,4 +206,3 @@ #define mmDMA_CH_3_MEM_INIT_BUSY 0x4191FC #endif /* ASIC_REG_DMA_CH_3_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h index 400d6fd3acf5..038617e163f1 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h @@ -206,4 +206,3 @@ #define mmDMA_CH_4_MEM_INIT_BUSY 0x4211FC #endif /* ASIC_REG_DMA_CH_4_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h index 8d965443c51e..f43b564af1be 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h @@ -102,4 +102,3 @@ #define DMA_MACRO_RAZWI_HBW_RD_ID_R_MASK 0x1FFFFFFF #endif /* ASIC_REG_DMA_MACRO_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h index 8bfcb001189d..c3bfc1b8e3fd 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h @@ -178,4 +178,3 @@ #define mmDMA_MACRO_RAZWI_HBW_RD_ID 0x4B0158 #endif /* ASIC_REG_DMA_MACRO_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h index 9f33f351a3c1..bc977488c072 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h @@ -206,4 +206,3 @@ #define DMA_NRTR_NON_LIN_SCRAMB_EN_MASK 0x1 #endif /* ASIC_REG_DMA_NRTR_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h index d8293745a02b..c4abc7ff1fc6 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h @@ -224,4 +224,3 @@ #define mmDMA_NRTR_NON_LIN_SCRAMB 0x1C0604 #endif /* ASIC_REG_DMA_NRTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h index 10619dbb9b17..b17f72c31ab6 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h @@ -462,4 +462,3 @@ #define DMA_QM_0_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF #endif /* ASIC_REG_DMA_QM_0_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h index c693bc5dcb22..bf360b301154 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h @@ -176,4 +176,3 @@ #define mmDMA_QM_0_CQ_BUF_RDATA 0x40030C #endif /* ASIC_REG_DMA_QM_0_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h index da928390f89c..51d432d05ac4 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h @@ -176,4 +176,3 @@ #define mmDMA_QM_1_CQ_BUF_RDATA 0x40830C #endif /* ASIC_REG_DMA_QM_1_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h index b4f06e9b71d6..18fc0c2b6cc2 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h @@ -176,4 +176,3 @@ #define mmDMA_QM_2_CQ_BUF_RDATA 0x41030C #endif /* ASIC_REG_DMA_QM_2_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h index 53e3cd78a06b..6cf7204bf5cc 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h @@ -176,4 +176,3 @@ #define mmDMA_QM_3_CQ_BUF_RDATA 0x41830C #endif /* ASIC_REG_DMA_QM_3_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h index e0eb5f260201..36fef2682875 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h @@ -176,4 +176,3 @@ #define mmDMA_QM_4_CQ_BUF_RDATA 0x42030C #endif /* ASIC_REG_DMA_QM_4_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h index 0a743817aad7..4ae7fed8b18c 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h @@ -102,4 +102,3 @@ #define mmIC_PLL_FREQ_CALC_EN 0x4A3440 #endif /* ASIC_REG_IC_PLL_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h index 4408188aa067..6d35d852798b 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h @@ -102,4 +102,3 @@ #define mmMC_PLL_FREQ_CALC_EN 0x4A1440 #endif /* ASIC_REG_MC_PLL_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h index 687bca5c5fe3..6c23f8b96e7e 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h @@ -650,4 +650,3 @@ #define MME1_RTR_NON_LIN_SCRAMB_EN_MASK 0x1 #endif /* ASIC_REG_MME1_RTR_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h index c248339a1cbe..122e9d529939 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h @@ -328,4 +328,3 @@ #define mmMME1_RTR_NON_LIN_SCRAMB 0x40604 #endif /* ASIC_REG_MME1_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h index 7a2b777bdc4f..00ce2252bbfb 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h @@ -328,4 +328,3 @@ #define mmMME2_RTR_NON_LIN_SCRAMB 0x80604 #endif /* ASIC_REG_MME2_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h index b78f8bc387fc..8e3eb7fd2070 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h @@ -328,4 +328,3 @@ #define mmMME3_RTR_NON_LIN_SCRAMB 0xC0604 #endif /* ASIC_REG_MME3_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h index d9a4a02cefa3..79b67bbc8567 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h @@ -328,4 +328,3 @@ #define mmMME4_RTR_NON_LIN_SCRAMB 0x100604 #endif /* ASIC_REG_MME4_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h index 205adc988407..0ac3c37ce47f 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h @@ -328,4 +328,3 @@ #define mmMME5_RTR_NON_LIN_SCRAMB 0x140604 #endif /* ASIC_REG_MME5_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h index fcec68388278..50c49cce72a6 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h @@ -328,4 +328,3 @@ #define mmMME6_RTR_NON_LIN_SCRAMB 0x180604 #endif /* ASIC_REG_MME6_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h index a0d4382fbbd0..fe7d95bdcef9 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h @@ -370,4 +370,3 @@ #define MME_CMDQ_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF #endif /* ASIC_REG_MME_CMDQ_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h index 5c2f6b870a58..5f8b85d2b4b1 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmMME_CMDQ_CQ_BUF_RDATA 0xD930C #endif /* ASIC_REG_MME_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h index c7b1b0bb3384..1882c413cbe0 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h @@ -1534,4 +1534,3 @@ #define MME_SHADOW_3_E_BUBBLES_PER_SPLIT_ID_MASK 0xFF000000 #endif /* ASIC_REG_MME_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h index d4bfa58dce19..e464e381555c 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h @@ -462,4 +462,3 @@ #define MME_QM_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF #endif /* ASIC_REG_MME_QM_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h index b5b1c776f6c3..538708beffc9 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h @@ -176,4 +176,3 @@ #define mmMME_QM_CQ_BUF_RDATA 0xD830C #endif /* ASIC_REG_MME_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h index 9436b1e2705a..0396cbfd5c89 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h @@ -1150,4 +1150,3 @@ #define mmMME_SHADOW_3_E_BUBBLES_PER_SPLIT 0xD0BAC #endif /* ASIC_REG_MME_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h index 3a78078d3c4c..c3e69062b135 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h @@ -140,4 +140,3 @@ #define MMU_ACCESS_ERROR_CAPTURE_VA_VA_31_0_MASK 0xFFFFFFFF #endif /* ASIC_REG_MMU_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h index bec6c014135c..7ec81f12031e 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h @@ -50,4 +50,3 @@ #define mmMMU_ACCESS_ERROR_CAPTURE_VA 0x480040 #endif /* ASIC_REG_MMU_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h index 209e41402a11..ceb59f2e28b3 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h @@ -206,4 +206,3 @@ #define PCI_NRTR_NON_LIN_SCRAMB_EN_MASK 0x1 #endif /* ASIC_REG_PCI_NRTR_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h index 447e5d4e7dc8..dd067f301ac2 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h @@ -224,4 +224,3 @@ #define mmPCI_NRTR_NON_LIN_SCRAMB 0x604 #endif /* ASIC_REG_PCI_NRTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h index daaf5d9079dc..35b1d8ac6f63 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h @@ -240,4 +240,3 @@ #define mmPCIE_AUX_PERST 0xC079B8 #endif /* ASIC_REG_PCIE_AUX_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h index 8eda4de58788..9271ea95ebe9 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h @@ -102,4 +102,3 @@ #define mmPSOC_EMMC_PLL_FREQ_CALC_EN 0xC70440 #endif /* ASIC_REG_PSOC_EMMC_PLL_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h index d4bf0e1db4df..324266653c9a 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h @@ -444,4 +444,3 @@ #define PSOC_GLOBAL_CONF_PAD_SEL_VAL_MASK 0x3 #endif /* ASIC_REG_PSOC_GLOBAL_CONF_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h index cfbdd2c9c5c7..8141f422e712 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h @@ -742,4 +742,3 @@ #define mmPSOC_GLOBAL_CONF_PAD_SEL_81 0xC4BA44 #endif /* ASIC_REG_PSOC_GLOBAL_CONF_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h index 6723d8f76f30..4789ebb9c337 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h @@ -102,4 +102,3 @@ #define mmPSOC_MME_PLL_FREQ_CALC_EN 0xC71440 #endif /* ASIC_REG_PSOC_MME_PLL_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h index abcded0531c9..27a296ea6c3d 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h @@ -102,4 +102,3 @@ #define mmPSOC_PCI_PLL_FREQ_CALC_EN 0xC72440 #endif /* ASIC_REG_PSOC_PCI_PLL_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h index 5925c7477c25..66aee7fa6b1e 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h @@ -140,4 +140,3 @@ #define mmPSOC_SPI_RSVD_2 0xC430FC #endif /* ASIC_REG_PSOC_SPI_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h index d56c9fa0e7ba..2ea1770b078f 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h @@ -80,4 +80,3 @@ #define mmSRAM_Y0_X0_RTR_DBG_L_ARB_MAX 0x201330 #endif /* ASIC_REG_SRAM_Y0_X0_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h index 5624544303ca..37e0713efa73 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h @@ -80,4 +80,3 @@ #define mmSRAM_Y0_X1_RTR_DBG_L_ARB_MAX 0x205330 #endif /* ASIC_REG_SRAM_Y0_X1_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h index 3322bc0bd1df..d2572279a2b9 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h @@ -80,4 +80,3 @@ #define mmSRAM_Y0_X2_RTR_DBG_L_ARB_MAX 0x209330 #endif /* ASIC_REG_SRAM_Y0_X2_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h index 81e393db2027..68c5b402c506 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h @@ -80,4 +80,3 @@ #define mmSRAM_Y0_X3_RTR_DBG_L_ARB_MAX 0x20D330 #endif /* ASIC_REG_SRAM_Y0_X3_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h index b2e11b1de385..a42f1ba06d28 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h @@ -80,4 +80,3 @@ #define mmSRAM_Y0_X4_RTR_DBG_L_ARB_MAX 0x211330 #endif /* ASIC_REG_SRAM_Y0_X4_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h index b4ea8cae2757..94f2ed4a36bd 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h @@ -114,4 +114,3 @@ #define STLB_SRAM_INIT_BUSY_DATA_MASK 0x10 #endif /* ASIC_REG_STLB_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h index 0f5281d3e65b..35013f65acd2 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h @@ -52,4 +52,3 @@ #define mmSTLB_SRAM_INIT 0x49004C #endif /* ASIC_REG_STLB_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h index e5587b49eecd..89c9507a512f 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h @@ -1604,4 +1604,3 @@ #define TPC0_CFG_FUNC_MBIST_MEM_LAST_FAILED_PATTERN_MASK 0x70000000 #endif /* ASIC_REG_TPC0_CFG_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h index 2be28a63c50a..7d71c4b73a5e 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC0_CFG_FUNC_MBIST_MEM_9 0xE06E2C #endif /* ASIC_REG_TPC0_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h index 9aa2d8b53207..9395f2458771 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h @@ -370,4 +370,3 @@ #define TPC0_CMDQ_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF #endif /* ASIC_REG_TPC0_CMDQ_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h index 3572752ba66e..bc51df573bf0 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC0_CMDQ_CQ_BUF_RDATA 0xE0930C #endif /* ASIC_REG_TPC0_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h index ed866d93c440..553c6b6bd5ec 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h @@ -344,4 +344,3 @@ #define TPC0_EML_CFG_DBG_INST_INSERT_CTL_INSERT_MASK 0x1 #endif /* ASIC_REG_TPC0_EML_CFG_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h index f1a1b4fa4841..8495479c3659 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h @@ -310,4 +310,3 @@ #define mmTPC0_EML_CFG_DBG_INST_INSERT_CTL 0x3040334 #endif /* ASIC_REG_TPC0_EML_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h index 7f86621179a5..43fafcf01041 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h @@ -206,4 +206,3 @@ #define TPC0_NRTR_NON_LIN_SCRAMB_EN_MASK 0x1 #endif /* ASIC_REG_TPC0_NRTR_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h index dc280f4e6608..ce3346dd2042 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h @@ -224,4 +224,3 @@ #define mmTPC0_NRTR_NON_LIN_SCRAMB 0xE00604 #endif /* ASIC_REG_TPC0_NRTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h index 80d97ee3d8d6..2e4b45947944 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h @@ -462,4 +462,3 @@ #define TPC0_QM_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF #endif /* ASIC_REG_TPC0_QM_MASKS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h index 7552d4ba61fe..4fa09eb88878 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC0_QM_CQ_BUF_RDATA 0xE0830C #endif /* ASIC_REG_TPC0_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h index 19894413474a..928eef1808ae 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC1_CFG_FUNC_MBIST_MEM_9 0xE46E2C #endif /* ASIC_REG_TPC1_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h index 9099ebd7ab23..30ae0f307328 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC1_CMDQ_CQ_BUF_RDATA 0xE4930C #endif /* ASIC_REG_TPC1_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h index bc8b9a10391f..b95de4f95ba9 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC1_QM_CQ_BUF_RDATA 0xE4830C #endif /* ASIC_REG_TPC1_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h index ae267f8f457e..0f91e307879e 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h @@ -320,4 +320,3 @@ #define mmTPC1_RTR_NON_LIN_SCRAMB 0xE40604 #endif /* ASIC_REG_TPC1_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h index 9c33fc039036..73421227f35b 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC2_CFG_FUNC_MBIST_MEM_9 0xE86E2C #endif /* ASIC_REG_TPC2_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h index 7a643887d6e1..27b66bf2da9f 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC2_CMDQ_CQ_BUF_RDATA 0xE8930C #endif /* ASIC_REG_TPC2_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h index f3e32c018064..31e5b2f53905 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC2_QM_CQ_BUF_RDATA 0xE8830C #endif /* ASIC_REG_TPC2_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h index 0eb0cd1fbd19..4eddeaa15d94 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h @@ -320,4 +320,3 @@ #define mmTPC2_RTR_NON_LIN_SCRAMB 0xE80604 #endif /* ASIC_REG_TPC2_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h index 0baf63c69b25..ce573a1a8361 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC3_CFG_FUNC_MBIST_MEM_9 0xEC6E2C #endif /* ASIC_REG_TPC3_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h index 82a5261e852f..11d81fca0a0f 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC3_CMDQ_CQ_BUF_RDATA 0xEC930C #endif /* ASIC_REG_TPC3_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h index b05b1e18e664..e41595a19e69 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC3_QM_CQ_BUF_RDATA 0xEC830C #endif /* ASIC_REG_TPC3_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h index 5a2fd7652650..34a438b1efe5 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h @@ -320,4 +320,3 @@ #define mmTPC3_RTR_NON_LIN_SCRAMB 0xEC0604 #endif /* ASIC_REG_TPC3_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h index d64a100075f2..d44caf0fc1bb 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC4_CFG_FUNC_MBIST_MEM_9 0xF06E2C #endif /* ASIC_REG_TPC4_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h index 565b42885b0d..f13a6532961f 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC4_CMDQ_CQ_BUF_RDATA 0xF0930C #endif /* ASIC_REG_TPC4_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h index 196da3f12710..db081fc17cfc 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC4_QM_CQ_BUF_RDATA 0xF0830C #endif /* ASIC_REG_TPC4_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h index 8b54041d144a..8c5372303b28 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h @@ -320,4 +320,3 @@ #define mmTPC4_RTR_NON_LIN_SCRAMB 0xF00604 #endif /* ASIC_REG_TPC4_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h index 3f00954fcdba..5139fde71011 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC5_CFG_FUNC_MBIST_MEM_9 0xF46E2C #endif /* ASIC_REG_TPC5_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h index d8e72a8e18d7..1e7cd6e1e888 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC5_CMDQ_CQ_BUF_RDATA 0xF4930C #endif /* ASIC_REG_TPC5_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h index be2e68624709..ac0d3820cd6b 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC5_QM_CQ_BUF_RDATA 0xF4830C #endif /* ASIC_REG_TPC5_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h index 6f301c7bbc2f..57f83bc3b17d 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h @@ -320,4 +320,3 @@ #define mmTPC5_RTR_NON_LIN_SCRAMB 0xF40604 #endif /* ASIC_REG_TPC5_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h index 1e1168601c41..94e0191c06c1 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC6_CFG_FUNC_MBIST_MEM_9 0xF86E2C #endif /* ASIC_REG_TPC6_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h index fbca6b47284e..7a1a0e87b225 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC6_CMDQ_CQ_BUF_RDATA 0xF8930C #endif /* ASIC_REG_TPC6_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h index bf32465dabcb..80fa0fe0f60f 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC6_QM_CQ_BUF_RDATA 0xF8830C #endif /* ASIC_REG_TPC6_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h index 609bb90e1046..d6cae8b8af66 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h @@ -320,4 +320,3 @@ #define mmTPC6_RTR_NON_LIN_SCRAMB 0xF80604 #endif /* ASIC_REG_TPC6_RTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h index bf2fd0f73906..234147adb779 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h @@ -884,4 +884,3 @@ #define mmTPC7_CFG_FUNC_MBIST_MEM_9 0xFC6E2C #endif /* ASIC_REG_TPC7_CFG_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h index 65d83043bf63..4c160632fe7d 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h @@ -136,4 +136,3 @@ #define mmTPC7_CMDQ_CQ_BUF_RDATA 0xFC930C #endif /* ASIC_REG_TPC7_CMDQ_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h index 3d5848d87304..0c13d4d167aa 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h @@ -224,4 +224,3 @@ #define mmTPC7_NRTR_NON_LIN_SCRAMB 0xFC0604 #endif /* ASIC_REG_TPC7_NRTR_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h index 25f5095f68fb..cbe11425bfb0 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h @@ -176,4 +176,3 @@ #define mmTPC7_QM_CQ_BUF_RDATA 0xFC830C #endif /* ASIC_REG_TPC7_QM_REGS_H_ */ - diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h index 920231d0afa5..e25e19660a9d 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h @@ -102,4 +102,3 @@ #define mmTPC_PLL_FREQ_CALC_EN 0xE01440 #endif /* ASIC_REG_TPC_PLL_REGS_H_ */ - From e850b89f50d2c1439f58d547b888ee6e43312dea Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 31 Mar 2019 21:37:42 +0300 Subject: [PATCH 21/31] habanalabs: prevent CPU soft lockup on Palladium Unmapping ptes in the device MMU on Palladium can take a long time, which can cause a kernel BUG of CPU soft lockup. This patch minimize the chances for this bug by sleeping a little between unmapping ptes. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 39788b1cf8d0..e9e163545584 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -1046,10 +1046,17 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) mutex_lock(&ctx->mmu_lock); - for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) + for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { if (hl_mmu_unmap(ctx, next_vaddr, page_size)) dev_warn_ratelimited(hdev->dev, - "unmap failed for vaddr: 0x%llx\n", next_vaddr); + "unmap failed for vaddr: 0x%llx\n", next_vaddr); + + /* unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + */ + if (hdev->pldm) + usleep_range(500, 1000); + } hdev->asic_funcs->mmu_invalidate_cache(hdev, true); From a1c92d1c2a67d7b61dd5ed2d3bce810269613d37 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 2 Apr 2019 15:46:02 +0300 Subject: [PATCH 22/31] habanalabs: remove extra semicolon This patch removes an extra ; after the closing brackets of a while loop. Signed-off-by: Oded Gabbay Reviewed-by: Mukesh Ojha --- drivers/misc/habanalabs/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index e3797f582436..6cbfd560721e 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -1044,7 +1044,7 @@ void hl_device_fini(struct hl_device *hdev) WARN(1, "Failed to remove device because reset function did not finish\n"); return; } - }; + } /* Mark device as disabled */ hdev->disabled = true; From 315bc055ed5667232859b17adb837f87c1629f81 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Mon, 1 Apr 2019 22:31:22 +0300 Subject: [PATCH 23/31] habanalabs: add new IOCTL for debug, tracing and profiling Habanalabs ASICs use the ARM coresight infrastructure to support debug, tracing and profiling of neural networks topologies. Because the coresight is configured using register writes and reads, and some of the registers hold sensitive information (e.g. the address in the device's DRAM where the trace data is written to), the user must go through the kernel driver to configure this mechanism. This patch implements the common code of the IOCTL and calls the ASIC-specific function for the actual H/W configuration. The IOCTL supports configuration of seven coresight components: ETR, ETF, STM, FUNNEL, BMON, SPMU and TIMESTAMP The user specifies which component he wishes to configure and provides a pointer to a structure (located in its process space) that contains the relevant configuration. The common code copies the relevant data from the user-space to kernel space and then calls the ASIC-specific function to do the H/W configuration. After the configuration is done, which is usually composed of several IOCTL calls depending on what the user wanted to trace, the user can start executing the topology. The trace data will be written to the user's area in the device's DRAM. After the tracing operation is complete, and user will call the IOCTL again to disable the tracing operation. The user also need to read values from registers for some of the components (e.g. the size of the trace data in the device's DRAM). In that case, the user will provide a pointer to an "output" structure in user-space, which the IOCTL code will fill according the to selected component. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/Makefile | 3 +- drivers/misc/habanalabs/goya/goya.c | 1 + drivers/misc/habanalabs/goya/goyaP.h | 1 + drivers/misc/habanalabs/goya/goya_coresight.c | 13 ++ drivers/misc/habanalabs/habanalabs.h | 25 ++++ drivers/misc/habanalabs/habanalabs_ioctl.c | 113 ++++++++++++++++- include/uapi/misc/habanalabs.h | 116 +++++++++++++++++- 7 files changed, 269 insertions(+), 3 deletions(-) create mode 100644 drivers/misc/habanalabs/goya/goya_coresight.c diff --git a/drivers/misc/habanalabs/goya/Makefile b/drivers/misc/habanalabs/goya/Makefile index e458e5ba500b..131432f677e2 100644 --- a/drivers/misc/habanalabs/goya/Makefile +++ b/drivers/misc/habanalabs/goya/Makefile @@ -1,3 +1,4 @@ subdir-ccflags-y += -I$(src) -HL_GOYA_FILES := goya/goya.o goya/goya_security.o goya/goya_hwmgr.o +HL_GOYA_FILES := goya/goya.o goya/goya_security.o goya/goya_hwmgr.o \ + goya/goya_coresight.o diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 9f775dd81a52..3af883ada479 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4815,6 +4815,7 @@ static const struct hl_asic_funcs goya_funcs = { .send_heartbeat = goya_send_heartbeat, .enable_clock_gating = goya_init_clock_gating, .disable_clock_gating = goya_disable_clock_gating, + .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, .soft_reset_late_init = goya_soft_reset_late_init, .hw_queues_lock = goya_hw_queues_lock, diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index b99d92f197eb..6f1f7715075d 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -175,6 +175,7 @@ void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); int goya_armcp_info_get(struct hl_device *hdev); void goya_init_security(struct hl_device *hdev); +int goya_debug_coresight(struct hl_device *hdev, void *data); u64 goya_get_max_power(struct hl_device *hdev); void goya_set_max_power(struct hl_device *hdev, u64 value); diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c new file mode 100644 index 000000000000..8957b8b1c0d6 --- /dev/null +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "goyaP.h" + +int goya_debug_coresight(struct hl_device *hdev, void *data) +{ + return -ENOTTY; +} diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index fd7fcdf7050f..18a03657780a 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -483,6 +483,7 @@ enum hl_pll_frequency { * @send_heartbeat: send is-alive packet to ArmCP and verify response. * @enable_clock_gating: enable clock gating for reducing power consumption. * @disable_clock_gating: disable clock for accessing registers on HBW. + * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @soft_reset_late_init: perform certain actions needed after soft reset. * @hw_queues_lock: acquire H/W queues lock. @@ -557,6 +558,7 @@ struct hl_asic_funcs { int (*send_heartbeat)(struct hl_device *hdev); void (*enable_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); + int (*debug_coresight)(struct hl_device *hdev, void *data); bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size); int (*soft_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); @@ -867,6 +869,29 @@ struct hl_vm { u8 init_done; }; + +/* + * DEBUG, PROFILING STRUCTURE + */ + +/** + * struct hl_debug_params - Coresight debug parameters. + * @input: pointer to component specific input parameters. + * @output: pointer to component specific output parameters. + * @output_size: size of output buffer. + * @reg_idx: relevant register ID. + * @op: component operation to execute. + * @enable: true if to enable component debugging, false otherwise. + */ +struct hl_debug_params { + void *input; + void *output; + u32 output_size; + u32 reg_idx; + u32 op; + bool enable; +}; + /* * FILE PRIVATE STRUCTURE */ diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 9000ff615805..810e65446075 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -12,6 +12,17 @@ #include #include +static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { + [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), + [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf), + [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm), + [HL_DEBUG_OP_FUNNEL] = 0, + [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon), + [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu), + [HL_DEBUG_OP_TIMESTAMP] = 0 + +}; + static int device_status_info(struct hl_device *hdev, struct hl_info_args *args) { struct hl_info_device_status dev_stat = {0}; @@ -114,6 +125,71 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; } +static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) +{ + struct hl_debug_params *params; + void *input = NULL, *output = NULL; + int rc; + + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + params->reg_idx = args->reg_idx; + params->enable = args->enable; + params->op = args->op; + + if (args->input_ptr && args->input_size) { + input = memdup_user((const void __user *) args->input_ptr, + args->input_size); + if (IS_ERR(input)) { + rc = PTR_ERR(input); + input = NULL; + dev_err(hdev->dev, + "error %d when copying input debug data\n", rc); + goto out; + } + + params->input = input; + } + + if (args->output_ptr && args->output_size) { + output = kzalloc(args->output_size, GFP_KERNEL); + if (!output) { + rc = -ENOMEM; + goto out; + } + + params->output = output; + params->output_size = args->output_size; + } + + rc = hdev->asic_funcs->debug_coresight(hdev, params); + if (rc) { + dev_err(hdev->dev, + "debug coresight operation failed %d\n", rc); + goto out; + } + + if (output) { + if (copy_to_user((void __user *) (uintptr_t) args->output_ptr, + output, + args->output_size)) { + dev_err(hdev->dev, + "copy to user failed in debug ioctl\n"); + rc = -EFAULT; + goto out; + } + } + +out: + kfree(params); + kfree(output); + kfree(input); + + return rc; +} + static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) { struct hl_info_args *args = data; @@ -156,6 +232,40 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) return rc; } +static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_debug_args *args = data; + struct hl_device *hdev = hpriv->hdev; + int rc = 0; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute DEBUG IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + switch (args->op) { + case HL_DEBUG_OP_ETR: + case HL_DEBUG_OP_ETF: + case HL_DEBUG_OP_STM: + case HL_DEBUG_OP_FUNNEL: + case HL_DEBUG_OP_BMON: + case HL_DEBUG_OP_SPMU: + case HL_DEBUG_OP_TIMESTAMP: + args->input_size = + min(args->input_size, hl_debug_struct_size[args->op]); + rc = debug_coresight(hdev, args); + break; + default: + dev_err(hdev->dev, "Invalid request %d\n", args->op); + rc = -ENOTTY; + break; + } + + return rc; +} + #define HL_IOCTL_DEF(ioctl, _func) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} @@ -164,7 +274,8 @@ static const struct hl_ioctl_desc hl_ioctls[] = { HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl), - HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl) + HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), + HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) }; #define HL_CORE_IOCTL_COUNT ARRAY_SIZE(hl_ioctls) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 993a79edad73..7704fe08c3ad 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -335,6 +335,107 @@ union hl_mem_args { struct hl_mem_out out; }; +#define HL_DEBUG_MAX_AUX_VALUES 10 + +struct hl_debug_params_etr { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_etf { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_stm { + /* Two bit masks for HW event and Stimulus Port */ + __u64 he_mask; + __u64 sp_mask; + + /* Trace source ID */ + __u32 id; + + /* Frequency for the timestamp register */ + __u32 frequency; +}; + +struct hl_debug_params_bmon { + /* Transaction address filter */ + __u64 addr_range0; + __u64 addr_range1; + + /* Capture window configuration */ + __u32 bw_win; + __u32 win_capture; + + /* Trace source ID */ + __u32 id; + __u32 pad; +}; + +struct hl_debug_params_spmu { + /* Event types selection */ + __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; + + /* Number of event types selection */ + __u32 event_types_num; + __u32 pad; +}; + +/* Opcode for ETR component */ +#define HL_DEBUG_OP_ETR 0 +/* Opcode for ETF component */ +#define HL_DEBUG_OP_ETF 1 +/* Opcode for STM component */ +#define HL_DEBUG_OP_STM 2 +/* Opcode for FUNNEL component */ +#define HL_DEBUG_OP_FUNNEL 3 +/* Opcode for BMON component */ +#define HL_DEBUG_OP_BMON 4 +/* Opcode for SPMU component */ +#define HL_DEBUG_OP_SPMU 5 +/* Opcode for timestamp */ +#define HL_DEBUG_OP_TIMESTAMP 6 + +struct hl_debug_args { + /* + * Pointer to user input structure. + * This field is relevant to specific opcodes. + */ + __u64 input_ptr; + /* Pointer to user output structure */ + __u64 output_ptr; + /* Size of user input structure */ + __u32 input_size; + /* Size of user output structure */ + __u32 output_size; + /* HL_DEBUG_OP_* */ + __u32 op; + /* + * Register index in the component, taken from the debug_regs_index enum + * in the various ASIC header files + */ + __u32 reg_idx; + /* Enable/disable */ + __u32 enable; + /* Context ID - Currently not in use */ + __u32 ctx_id; +}; + /* * Various information operations such as: * - H/W IP information @@ -459,7 +560,20 @@ union hl_mem_args { #define HL_IOCTL_MEMORY \ _IOWR('H', 0x05, union hl_mem_args) +/* + * Debug + * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces + * + * This IOCTL allows the user to get debug traces from the chip. + * + * The user needs to provide the register index and essential data such as + * buffer address and size. + * + */ +#define HL_IOCTL_DEBUG \ + _IOWR('H', 0x06, struct hl_debug_args) + #define HL_COMMAND_START 0x01 -#define HL_COMMAND_END 0x06 +#define HL_COMMAND_END 0x07 #endif /* HABANALABS_H_ */ From 8ba2876ddf935b845340571e2d197347b428879e Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Mon, 1 Apr 2019 22:23:02 +0300 Subject: [PATCH 24/31] habanalabs: add goya implementation for debug configuration This patch adds the ASIC-specific function for GOYA to configure the coresight components. Most of the components have an enabled/disabled flag, depending on whether the user wants to enable the component or disable it. For some of the components, such as ETR and SPMU, the user can also request to read values from them. Those values are needed for the user to parse the trace data. The ETR configuration is also checked for security purposes, to make sure the trace data is written to the device's DRAM. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 9 +- drivers/misc/habanalabs/goya/goyaP.h | 2 + drivers/misc/habanalabs/goya/goya_coresight.c | 609 +++++++++++++++++- .../include/goya/asic_reg/goya_regs.h | 3 +- .../include/goya/asic_reg/pcie_wrap_regs.h | 306 +++++++++ .../habanalabs/include/goya/goya_coresight.h | 199 ++++++ 6 files changed, 1122 insertions(+), 6 deletions(-) create mode 100644 drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h create mode 100644 drivers/misc/habanalabs/include/goya/goya_coresight.h diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3af883ada479..c6b9d4aabef6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -69,7 +69,7 @@ * */ -#define GOYA_MMU_REGS_NUM 61 +#define GOYA_MMU_REGS_NUM 63 #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ @@ -85,8 +85,6 @@ #define GOYA_QMAN0_FENCE_VAL 0xD169B243 -#define GOYA_MAX_INITIATORS 20 - #define GOYA_MAX_STRING_LEN 20 #define GOYA_CB_POOL_CB_CNT 512 @@ -171,7 +169,9 @@ static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = { mmMME_SBA_CONTROL_DATA, mmMME_SBB_CONTROL_DATA, mmMME_SBC_CONTROL_DATA, - mmMME_WBC_CONTROL_DATA + mmMME_WBC_CONTROL_DATA, + mmPCIE_WRAP_PSOC_ARUSER, + mmPCIE_WRAP_PSOC_AWUSER }; static u32 goya_all_events[] = { @@ -1436,6 +1436,7 @@ static void goya_init_golden_registers(struct hl_device *hdev) */ WREG32(mmDMA_CH_0_CFG0, 0x0fff0010); WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0); + WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); goya->hw_cap_initialized |= HW_CAP_GOLDEN; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 6f1f7715075d..2a6cdca744dd 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -43,6 +43,8 @@ #define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */ +#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ + #define TPC_ENABLED_MASK 0xFF #define PLL_HIGH_DEFAULT 1575000000 /* 1.575 GHz */ diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index 8957b8b1c0d6..68726fb4c56a 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -6,8 +6,615 @@ */ #include "goyaP.h" +#include "include/goya/goya_coresight.h" +#include "include/goya/asic_reg/goya_regs.h" + +#include + +#include + +#define GOYA_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 100) + +static u64 debug_stm_regs[GOYA_STM_LAST + 1] = { + [GOYA_STM_CPU] = mmCPU_STM_BASE, + [GOYA_STM_DMA_CH_0_CS] = mmDMA_CH_0_CS_STM_BASE, + [GOYA_STM_DMA_CH_1_CS] = mmDMA_CH_1_CS_STM_BASE, + [GOYA_STM_DMA_CH_2_CS] = mmDMA_CH_2_CS_STM_BASE, + [GOYA_STM_DMA_CH_3_CS] = mmDMA_CH_3_CS_STM_BASE, + [GOYA_STM_DMA_CH_4_CS] = mmDMA_CH_4_CS_STM_BASE, + [GOYA_STM_DMA_MACRO_CS] = mmDMA_MACRO_CS_STM_BASE, + [GOYA_STM_MME1_SBA] = mmMME1_SBA_STM_BASE, + [GOYA_STM_MME3_SBB] = mmMME3_SBB_STM_BASE, + [GOYA_STM_MME4_WACS2] = mmMME4_WACS2_STM_BASE, + [GOYA_STM_MME4_WACS] = mmMME4_WACS_STM_BASE, + [GOYA_STM_MMU_CS] = mmMMU_CS_STM_BASE, + [GOYA_STM_PCIE] = mmPCIE_STM_BASE, + [GOYA_STM_PSOC] = mmPSOC_STM_BASE, + [GOYA_STM_TPC0_EML] = mmTPC0_EML_STM_BASE, + [GOYA_STM_TPC1_EML] = mmTPC1_EML_STM_BASE, + [GOYA_STM_TPC2_EML] = mmTPC2_EML_STM_BASE, + [GOYA_STM_TPC3_EML] = mmTPC3_EML_STM_BASE, + [GOYA_STM_TPC4_EML] = mmTPC4_EML_STM_BASE, + [GOYA_STM_TPC5_EML] = mmTPC5_EML_STM_BASE, + [GOYA_STM_TPC6_EML] = mmTPC6_EML_STM_BASE, + [GOYA_STM_TPC7_EML] = mmTPC7_EML_STM_BASE +}; + +static u64 debug_etf_regs[GOYA_ETF_LAST + 1] = { + [GOYA_ETF_CPU_0] = mmCPU_ETF_0_BASE, + [GOYA_ETF_CPU_1] = mmCPU_ETF_1_BASE, + [GOYA_ETF_CPU_TRACE] = mmCPU_ETF_TRACE_BASE, + [GOYA_ETF_DMA_CH_0_CS] = mmDMA_CH_0_CS_ETF_BASE, + [GOYA_ETF_DMA_CH_1_CS] = mmDMA_CH_1_CS_ETF_BASE, + [GOYA_ETF_DMA_CH_2_CS] = mmDMA_CH_2_CS_ETF_BASE, + [GOYA_ETF_DMA_CH_3_CS] = mmDMA_CH_3_CS_ETF_BASE, + [GOYA_ETF_DMA_CH_4_CS] = mmDMA_CH_4_CS_ETF_BASE, + [GOYA_ETF_DMA_MACRO_CS] = mmDMA_MACRO_CS_ETF_BASE, + [GOYA_ETF_MME1_SBA] = mmMME1_SBA_ETF_BASE, + [GOYA_ETF_MME3_SBB] = mmMME3_SBB_ETF_BASE, + [GOYA_ETF_MME4_WACS2] = mmMME4_WACS2_ETF_BASE, + [GOYA_ETF_MME4_WACS] = mmMME4_WACS_ETF_BASE, + [GOYA_ETF_MMU_CS] = mmMMU_CS_ETF_BASE, + [GOYA_ETF_PCIE] = mmPCIE_ETF_BASE, + [GOYA_ETF_PSOC] = mmPSOC_ETF_BASE, + [GOYA_ETF_TPC0_EML] = mmTPC0_EML_ETF_BASE, + [GOYA_ETF_TPC1_EML] = mmTPC1_EML_ETF_BASE, + [GOYA_ETF_TPC2_EML] = mmTPC2_EML_ETF_BASE, + [GOYA_ETF_TPC3_EML] = mmTPC3_EML_ETF_BASE, + [GOYA_ETF_TPC4_EML] = mmTPC4_EML_ETF_BASE, + [GOYA_ETF_TPC5_EML] = mmTPC5_EML_ETF_BASE, + [GOYA_ETF_TPC6_EML] = mmTPC6_EML_ETF_BASE, + [GOYA_ETF_TPC7_EML] = mmTPC7_EML_ETF_BASE +}; + +static u64 debug_funnel_regs[GOYA_FUNNEL_LAST + 1] = { + [GOYA_FUNNEL_CPU] = mmCPU_FUNNEL_BASE, + [GOYA_FUNNEL_DMA_CH_6_1] = mmDMA_CH_FUNNEL_6_1_BASE, + [GOYA_FUNNEL_DMA_MACRO_3_1] = mmDMA_MACRO_FUNNEL_3_1_BASE, + [GOYA_FUNNEL_MME0_RTR] = mmMME0_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_MME1_RTR] = mmMME1_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_MME2_RTR] = mmMME2_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_MME3_RTR] = mmMME3_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_MME4_RTR] = mmMME4_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_MME5_RTR] = mmMME5_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_PCIE] = mmPCIE_FUNNEL_BASE, + [GOYA_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE, + [GOYA_FUNNEL_TPC0_EML] = mmTPC0_EML_FUNNEL_BASE, + [GOYA_FUNNEL_TPC1_EML] = mmTPC1_EML_FUNNEL_BASE, + [GOYA_FUNNEL_TPC1_RTR] = mmTPC1_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_TPC2_EML] = mmTPC2_EML_FUNNEL_BASE, + [GOYA_FUNNEL_TPC2_RTR] = mmTPC2_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_TPC3_EML] = mmTPC3_EML_FUNNEL_BASE, + [GOYA_FUNNEL_TPC3_RTR] = mmTPC3_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_TPC4_EML] = mmTPC4_EML_FUNNEL_BASE, + [GOYA_FUNNEL_TPC4_RTR] = mmTPC4_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_TPC5_EML] = mmTPC5_EML_FUNNEL_BASE, + [GOYA_FUNNEL_TPC5_RTR] = mmTPC5_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_TPC6_EML] = mmTPC6_EML_FUNNEL_BASE, + [GOYA_FUNNEL_TPC6_RTR] = mmTPC6_RTR_FUNNEL_BASE, + [GOYA_FUNNEL_TPC7_EML] = mmTPC7_EML_FUNNEL_BASE +}; + +static u64 debug_bmon_regs[GOYA_BMON_LAST + 1] = { + [GOYA_BMON_CPU_RD] = mmCPU_RD_BMON_BASE, + [GOYA_BMON_CPU_WR] = mmCPU_WR_BMON_BASE, + [GOYA_BMON_DMA_CH_0_0] = mmDMA_CH_0_BMON_0_BASE, + [GOYA_BMON_DMA_CH_0_1] = mmDMA_CH_0_BMON_1_BASE, + [GOYA_BMON_DMA_CH_1_0] = mmDMA_CH_1_BMON_0_BASE, + [GOYA_BMON_DMA_CH_1_1] = mmDMA_CH_1_BMON_1_BASE, + [GOYA_BMON_DMA_CH_2_0] = mmDMA_CH_2_BMON_0_BASE, + [GOYA_BMON_DMA_CH_2_1] = mmDMA_CH_2_BMON_1_BASE, + [GOYA_BMON_DMA_CH_3_0] = mmDMA_CH_3_BMON_0_BASE, + [GOYA_BMON_DMA_CH_3_1] = mmDMA_CH_3_BMON_1_BASE, + [GOYA_BMON_DMA_CH_4_0] = mmDMA_CH_4_BMON_0_BASE, + [GOYA_BMON_DMA_CH_4_1] = mmDMA_CH_4_BMON_1_BASE, + [GOYA_BMON_DMA_MACRO_0] = mmDMA_MACRO_BMON_0_BASE, + [GOYA_BMON_DMA_MACRO_1] = mmDMA_MACRO_BMON_1_BASE, + [GOYA_BMON_DMA_MACRO_2] = mmDMA_MACRO_BMON_2_BASE, + [GOYA_BMON_DMA_MACRO_3] = mmDMA_MACRO_BMON_3_BASE, + [GOYA_BMON_DMA_MACRO_4] = mmDMA_MACRO_BMON_4_BASE, + [GOYA_BMON_DMA_MACRO_5] = mmDMA_MACRO_BMON_5_BASE, + [GOYA_BMON_DMA_MACRO_6] = mmDMA_MACRO_BMON_6_BASE, + [GOYA_BMON_DMA_MACRO_7] = mmDMA_MACRO_BMON_7_BASE, + [GOYA_BMON_MME1_SBA_0] = mmMME1_SBA_BMON0_BASE, + [GOYA_BMON_MME1_SBA_1] = mmMME1_SBA_BMON1_BASE, + [GOYA_BMON_MME3_SBB_0] = mmMME3_SBB_BMON0_BASE, + [GOYA_BMON_MME3_SBB_1] = mmMME3_SBB_BMON1_BASE, + [GOYA_BMON_MME4_WACS2_0] = mmMME4_WACS2_BMON0_BASE, + [GOYA_BMON_MME4_WACS2_1] = mmMME4_WACS2_BMON1_BASE, + [GOYA_BMON_MME4_WACS2_2] = mmMME4_WACS2_BMON2_BASE, + [GOYA_BMON_MME4_WACS_0] = mmMME4_WACS_BMON0_BASE, + [GOYA_BMON_MME4_WACS_1] = mmMME4_WACS_BMON1_BASE, + [GOYA_BMON_MME4_WACS_2] = mmMME4_WACS_BMON2_BASE, + [GOYA_BMON_MME4_WACS_3] = mmMME4_WACS_BMON3_BASE, + [GOYA_BMON_MME4_WACS_4] = mmMME4_WACS_BMON4_BASE, + [GOYA_BMON_MME4_WACS_5] = mmMME4_WACS_BMON5_BASE, + [GOYA_BMON_MME4_WACS_6] = mmMME4_WACS_BMON6_BASE, + [GOYA_BMON_MMU_0] = mmMMU_BMON_0_BASE, + [GOYA_BMON_MMU_1] = mmMMU_BMON_1_BASE, + [GOYA_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE, + [GOYA_BMON_PCIE_MSTR_WR] = mmPCIE_BMON_MSTR_WR_BASE, + [GOYA_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE, + [GOYA_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE, + [GOYA_BMON_TPC0_EML_0] = mmTPC0_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC0_EML_1] = mmTPC0_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC0_EML_2] = mmTPC0_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC0_EML_3] = mmTPC0_EML_BUSMON_3_BASE, + [GOYA_BMON_TPC1_EML_0] = mmTPC1_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC1_EML_1] = mmTPC1_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC1_EML_2] = mmTPC1_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC1_EML_3] = mmTPC1_EML_BUSMON_3_BASE, + [GOYA_BMON_TPC2_EML_0] = mmTPC2_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC2_EML_1] = mmTPC2_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC2_EML_2] = mmTPC2_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC2_EML_3] = mmTPC2_EML_BUSMON_3_BASE, + [GOYA_BMON_TPC3_EML_0] = mmTPC3_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC3_EML_1] = mmTPC3_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC3_EML_2] = mmTPC3_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC3_EML_3] = mmTPC3_EML_BUSMON_3_BASE, + [GOYA_BMON_TPC4_EML_0] = mmTPC4_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC4_EML_1] = mmTPC4_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC4_EML_2] = mmTPC4_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC4_EML_3] = mmTPC4_EML_BUSMON_3_BASE, + [GOYA_BMON_TPC5_EML_0] = mmTPC5_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC5_EML_1] = mmTPC5_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC5_EML_2] = mmTPC5_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC5_EML_3] = mmTPC5_EML_BUSMON_3_BASE, + [GOYA_BMON_TPC6_EML_0] = mmTPC6_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC6_EML_1] = mmTPC6_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC6_EML_2] = mmTPC6_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC6_EML_3] = mmTPC6_EML_BUSMON_3_BASE, + [GOYA_BMON_TPC7_EML_0] = mmTPC7_EML_BUSMON_0_BASE, + [GOYA_BMON_TPC7_EML_1] = mmTPC7_EML_BUSMON_1_BASE, + [GOYA_BMON_TPC7_EML_2] = mmTPC7_EML_BUSMON_2_BASE, + [GOYA_BMON_TPC7_EML_3] = mmTPC7_EML_BUSMON_3_BASE +}; + +static u64 debug_spmu_regs[GOYA_SPMU_LAST + 1] = { + [GOYA_SPMU_DMA_CH_0_CS] = mmDMA_CH_0_CS_SPMU_BASE, + [GOYA_SPMU_DMA_CH_1_CS] = mmDMA_CH_1_CS_SPMU_BASE, + [GOYA_SPMU_DMA_CH_2_CS] = mmDMA_CH_2_CS_SPMU_BASE, + [GOYA_SPMU_DMA_CH_3_CS] = mmDMA_CH_3_CS_SPMU_BASE, + [GOYA_SPMU_DMA_CH_4_CS] = mmDMA_CH_4_CS_SPMU_BASE, + [GOYA_SPMU_DMA_MACRO_CS] = mmDMA_MACRO_CS_SPMU_BASE, + [GOYA_SPMU_MME1_SBA] = mmMME1_SBA_SPMU_BASE, + [GOYA_SPMU_MME3_SBB] = mmMME3_SBB_SPMU_BASE, + [GOYA_SPMU_MME4_WACS2] = mmMME4_WACS2_SPMU_BASE, + [GOYA_SPMU_MME4_WACS] = mmMME4_WACS_SPMU_BASE, + [GOYA_SPMU_MMU_CS] = mmMMU_CS_SPMU_BASE, + [GOYA_SPMU_PCIE] = mmPCIE_SPMU_BASE, + [GOYA_SPMU_TPC0_EML] = mmTPC0_EML_SPMU_BASE, + [GOYA_SPMU_TPC1_EML] = mmTPC1_EML_SPMU_BASE, + [GOYA_SPMU_TPC2_EML] = mmTPC2_EML_SPMU_BASE, + [GOYA_SPMU_TPC3_EML] = mmTPC3_EML_SPMU_BASE, + [GOYA_SPMU_TPC4_EML] = mmTPC4_EML_SPMU_BASE, + [GOYA_SPMU_TPC5_EML] = mmTPC5_EML_SPMU_BASE, + [GOYA_SPMU_TPC6_EML] = mmTPC6_EML_SPMU_BASE, + [GOYA_SPMU_TPC7_EML] = mmTPC7_EML_SPMU_BASE +}; + +static int goya_coresight_timeout(struct hl_device *hdev, u64 addr, + int position, bool up) +{ + int rc; + u32 val, timeout_usec; + + if (hdev->pldm) + timeout_usec = GOYA_PLDM_CORESIGHT_TIMEOUT_USEC; + else + timeout_usec = CORESIGHT_TIMEOUT_USEC; + + rc = hl_poll_timeout( + hdev, + addr, + val, + up ? val & BIT(position) : !(val & BIT(position)), + 1000, + timeout_usec); + + if (rc) { + dev_err(hdev->dev, + "Timeout while waiting for coresight, addr: 0x%llx, position: %d, up: %d\n", + addr, position, up); + return -EFAULT; + } + + return 0; +} + +static int goya_config_stm(struct hl_device *hdev, + struct hl_debug_params *params) +{ + struct hl_debug_params_stm *input; + u64 base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE; + int rc; + + WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + + if (params->enable) { + input = params->input; + + if (!input) + return -EINVAL; + + WREG32(base_reg + 0xE80, 0x80004); + WREG32(base_reg + 0xD64, 7); + WREG32(base_reg + 0xD60, 0); + WREG32(base_reg + 0xD00, lower_32_bits(input->he_mask)); + WREG32(base_reg + 0xD20, lower_32_bits(input->sp_mask)); + WREG32(base_reg + 0xD60, 1); + WREG32(base_reg + 0xD00, upper_32_bits(input->he_mask)); + WREG32(base_reg + 0xD20, upper_32_bits(input->sp_mask)); + WREG32(base_reg + 0xE70, 0x10); + WREG32(base_reg + 0xE60, 0); + WREG32(base_reg + 0xE64, 0x420000); + WREG32(base_reg + 0xE00, 0xFFFFFFFF); + WREG32(base_reg + 0xE20, 0xFFFFFFFF); + WREG32(base_reg + 0xEF4, input->id); + WREG32(base_reg + 0xDF4, 0x80); + WREG32(base_reg + 0xE8C, input->frequency); + WREG32(base_reg + 0xE90, 0x7FF); + WREG32(base_reg + 0xE80, 0x7 | (input->id << 16)); + } else { + WREG32(base_reg + 0xE80, 4); + WREG32(base_reg + 0xD64, 0); + WREG32(base_reg + 0xD60, 1); + WREG32(base_reg + 0xD00, 0); + WREG32(base_reg + 0xD20, 0); + WREG32(base_reg + 0xD60, 0); + WREG32(base_reg + 0xE20, 0); + WREG32(base_reg + 0xE00, 0); + WREG32(base_reg + 0xDF4, 0x80); + WREG32(base_reg + 0xE70, 0); + WREG32(base_reg + 0xE60, 0); + WREG32(base_reg + 0xE64, 0); + WREG32(base_reg + 0xE8C, 0); + + rc = goya_coresight_timeout(hdev, base_reg + 0xE80, 23, false); + if (rc) { + dev_err(hdev->dev, + "Failed to disable STM on timeout, error %d\n", + rc); + return rc; + } + + WREG32(base_reg + 0xE80, 4); + } + + return 0; +} + +static int goya_config_etf(struct hl_device *hdev, + struct hl_debug_params *params) +{ + struct hl_debug_params_etf *input; + u64 base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE; + u32 val; + int rc; + + WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + + val = RREG32(base_reg + 0x304); + val |= 0x1000; + WREG32(base_reg + 0x304, val); + val |= 0x40; + WREG32(base_reg + 0x304, val); + + rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false); + if (rc) { + dev_err(hdev->dev, + "Failed to %s ETF on timeout, error %d\n", + params->enable ? "enable" : "disable", rc); + return rc; + } + + rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true); + if (rc) { + dev_err(hdev->dev, + "Failed to %s ETF on timeout, error %d\n", + params->enable ? "enable" : "disable", rc); + return rc; + } + + WREG32(base_reg + 0x20, 0); + + if (params->enable) { + input = params->input; + + if (!input) + return -EINVAL; + + WREG32(base_reg + 0x34, 0x3FFC); + WREG32(base_reg + 0x28, input->sink_mode); + WREG32(base_reg + 0x304, 0x4001); + WREG32(base_reg + 0x308, 0xA); + WREG32(base_reg + 0x20, 1); + } else { + WREG32(base_reg + 0x34, 0); + WREG32(base_reg + 0x28, 0); + WREG32(base_reg + 0x304, 0); + } + + return 0; +} + +static int goya_etr_validate_address(struct hl_device *hdev, u64 addr, + u32 size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 range_start, range_end; + + if (hdev->mmu_enable) { + range_start = prop->va_space_dram_start_address; + range_end = prop->va_space_dram_end_address; + } else { + range_start = prop->dram_user_base_address; + range_end = prop->dram_end_address; + } + + return hl_mem_area_inside_range(addr, size, range_start, range_end); +} + +static int goya_config_etr(struct hl_device *hdev, + struct hl_debug_params *params) +{ + struct hl_debug_params_etr *input; + u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE; + u32 val; + int rc; + + WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + + val = RREG32(base_reg + 0x304); + val |= 0x1000; + WREG32(base_reg + 0x304, val); + val |= 0x40; + WREG32(base_reg + 0x304, val); + + rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false); + if (rc) { + dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", + params->enable ? "enable" : "disable", rc); + return rc; + } + + rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true); + if (rc) { + dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", + params->enable ? "enable" : "disable", rc); + return rc; + } + + WREG32(base_reg + 0x20, 0); + + if (params->enable) { + input = params->input; + + if (!input) + return -EINVAL; + + if (input->buffer_size == 0) { + dev_err(hdev->dev, + "ETR buffer size should be bigger than 0\n"); + return -EINVAL; + } + + if (!goya_etr_validate_address(hdev, + input->buffer_address, input->buffer_size)) { + dev_err(hdev->dev, "buffer address is not valid\n"); + return -EINVAL; + } + + WREG32(base_reg + 0x34, 0x3FFC); + WREG32(base_reg + 0x4, input->buffer_size); + WREG32(base_reg + 0x28, input->sink_mode); + WREG32(base_reg + 0x110, 0x700); + WREG32(base_reg + 0x118, + lower_32_bits(input->buffer_address)); + WREG32(base_reg + 0x11C, + upper_32_bits(input->buffer_address)); + WREG32(base_reg + 0x304, 3); + WREG32(base_reg + 0x308, 0xA); + WREG32(base_reg + 0x20, 1); + } else { + WREG32(base_reg + 0x34, 0); + WREG32(base_reg + 0x4, 0x400); + WREG32(base_reg + 0x118, 0); + WREG32(base_reg + 0x11C, 0); + WREG32(base_reg + 0x308, 0); + WREG32(base_reg + 0x28, 0); + WREG32(base_reg + 0x304, 0); + + if (params->output_size >= sizeof(u32)) + *(u32 *) params->output = RREG32(base_reg + 0x18); + } + + return 0; +} + +static int goya_config_funnel(struct hl_device *hdev, + struct hl_debug_params *params) +{ + WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE + 0xFB0, + CORESIGHT_UNLOCK); + + WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE, + params->enable ? 0x33F : 0); + + return 0; +} + +static int goya_config_bmon(struct hl_device *hdev, + struct hl_debug_params *params) +{ + struct hl_debug_params_bmon *input; + u64 base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE; + u32 pcie_base = 0; + + WREG32(base_reg + 0x104, 1); + + if (params->enable) { + input = params->input; + + if (!input) + return -EINVAL; + + WREG32(base_reg + 0x208, lower_32_bits(input->addr_range0)); + WREG32(base_reg + 0x20C, upper_32_bits(input->addr_range0)); + WREG32(base_reg + 0x248, lower_32_bits(input->addr_range1)); + WREG32(base_reg + 0x24C, upper_32_bits(input->addr_range1)); + WREG32(base_reg + 0x224, 0); + WREG32(base_reg + 0x234, 0); + WREG32(base_reg + 0x30C, input->bw_win); + WREG32(base_reg + 0x308, input->win_capture); + + /* PCIE IF BMON bug WA */ + if (params->reg_idx != GOYA_BMON_PCIE_MSTR_RD && + params->reg_idx != GOYA_BMON_PCIE_MSTR_WR && + params->reg_idx != GOYA_BMON_PCIE_SLV_RD && + params->reg_idx != GOYA_BMON_PCIE_SLV_WR) + pcie_base = 0xA000000; + + WREG32(base_reg + 0x700, pcie_base | 0xB00 | (input->id << 12)); + WREG32(base_reg + 0x708, pcie_base | 0xA00 | (input->id << 12)); + WREG32(base_reg + 0x70C, pcie_base | 0xC00 | (input->id << 12)); + + WREG32(base_reg + 0x100, 0x11); + WREG32(base_reg + 0x304, 0x1); + } else { + WREG32(base_reg + 0x208, 0xFFFFFFFF); + WREG32(base_reg + 0x20C, 0xFFFFFFFF); + WREG32(base_reg + 0x248, 0xFFFFFFFF); + WREG32(base_reg + 0x24C, 0xFFFFFFFF); + WREG32(base_reg + 0x224, 0xFFFFFFFF); + WREG32(base_reg + 0x234, 0x1070F); + WREG32(base_reg + 0x30C, 0); + WREG32(base_reg + 0x308, 0xFFFF); + WREG32(base_reg + 0x700, 0xA000B00); + WREG32(base_reg + 0x708, 0xA000A00); + WREG32(base_reg + 0x70C, 0xA000C00); + WREG32(base_reg + 0x100, 1); + WREG32(base_reg + 0x304, 0); + WREG32(base_reg + 0x104, 0); + } + + return 0; +} + +static int goya_config_spmu(struct hl_device *hdev, + struct hl_debug_params *params) +{ + u64 base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE; + struct hl_debug_params_spmu *input = params->input; + u64 *output; + u32 output_arr_len; + u32 events_num; + u32 overflow_idx; + u32 cycle_cnt_idx; + int i; + + if (params->enable) { + input = params->input; + + if (!input) + return -EINVAL; + + if (input->event_types_num < 3) { + dev_err(hdev->dev, + "not enough values for SPMU enable\n"); + return -EINVAL; + } + + WREG32(base_reg + 0xE04, 0x41013046); + WREG32(base_reg + 0xE04, 0x41013040); + + for (i = 0 ; i < input->event_types_num ; i++) + WREG32(base_reg + 0x400 + i * 4, input->event_types[i]); + + WREG32(base_reg + 0xE04, 0x41013041); + WREG32(base_reg + 0xC00, 0x8000003F); + } else { + output = params->output; + output_arr_len = params->output_size / 8; + events_num = output_arr_len - 2; + overflow_idx = output_arr_len - 2; + cycle_cnt_idx = output_arr_len - 1; + + if (!output) + return -EINVAL; + + if (output_arr_len < 3) { + dev_err(hdev->dev, + "not enough values for SPMU disable\n"); + return -EINVAL; + } + + WREG32(base_reg + 0xE04, 0x41013040); + + for (i = 0 ; i < events_num ; i++) + output[i] = RREG32(base_reg + i * 8); + + output[overflow_idx] = RREG32(base_reg + 0xCC0); + + output[cycle_cnt_idx] = RREG32(base_reg + 0xFC); + output[cycle_cnt_idx] <<= 32; + output[cycle_cnt_idx] |= RREG32(base_reg + 0xF8); + + WREG32(base_reg + 0xCC0, 0); + } + + return 0; +} + +static int goya_config_timestamp(struct hl_device *hdev, + struct hl_debug_params *params) +{ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); + if (params->enable) { + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); + } + + return 0; +} int goya_debug_coresight(struct hl_device *hdev, void *data) { - return -ENOTTY; + struct hl_debug_params *params = data; + u32 val; + int rc; + + switch (params->op) { + case HL_DEBUG_OP_STM: + rc = goya_config_stm(hdev, params); + break; + case HL_DEBUG_OP_ETF: + rc = goya_config_etf(hdev, params); + break; + case HL_DEBUG_OP_ETR: + rc = goya_config_etr(hdev, params); + break; + case HL_DEBUG_OP_FUNNEL: + rc = goya_config_funnel(hdev, params); + break; + case HL_DEBUG_OP_BMON: + rc = goya_config_bmon(hdev, params); + break; + case HL_DEBUG_OP_SPMU: + rc = goya_config_spmu(hdev, params); + break; + case HL_DEBUG_OP_TIMESTAMP: + rc = goya_config_timestamp(hdev, params); + break; + + default: + dev_err(hdev->dev, "Unknown coresight id %d\n", params->op); + return -EINVAL; + } + + /* Perform read from the device to flush all configuration */ + val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + + return rc; } diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h index 6cb0b6e54d41..506e71e201e1 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2018 HabanaLabs, Ltd. + * Copyright 2016-2019 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -12,6 +12,7 @@ #include "stlb_regs.h" #include "mmu_regs.h" #include "pcie_aux_regs.h" +#include "pcie_wrap_regs.h" #include "psoc_global_conf_regs.h" #include "psoc_spi_regs.h" #include "psoc_mme_pll_regs.h" diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h new file mode 100644 index 000000000000..d1e55aace4a0 --- /dev/null +++ b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2018 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +/************************************ + ** This is an auto-generated file ** + ** DO NOT EDIT BELOW ** + ************************************/ + +#ifndef ASIC_REG_PCIE_WRAP_REGS_H_ +#define ASIC_REG_PCIE_WRAP_REGS_H_ + +/* + ***************************************** + * PCIE_WRAP (Prototype: PCIE_WRAP) + ***************************************** + */ + +#define mmPCIE_WRAP_PHY_RST_N 0xC01300 + +#define mmPCIE_WRAP_OUTSTAND_TRANS 0xC01400 + +#define mmPCIE_WRAP_MASK_REQ 0xC01404 + +#define mmPCIE_WRAP_IND_AWADDR_L 0xC01500 + +#define mmPCIE_WRAP_IND_AWADDR_H 0xC01504 + +#define mmPCIE_WRAP_IND_AWLEN 0xC01508 + +#define mmPCIE_WRAP_IND_AWSIZE 0xC0150C + +#define mmPCIE_WRAP_IND_AWBURST 0xC01510 + +#define mmPCIE_WRAP_IND_AWLOCK 0xC01514 + +#define mmPCIE_WRAP_IND_AWCACHE 0xC01518 + +#define mmPCIE_WRAP_IND_AWPROT 0xC0151C + +#define mmPCIE_WRAP_IND_AWVALID 0xC01520 + +#define mmPCIE_WRAP_IND_WDATA_0 0xC01524 + +#define mmPCIE_WRAP_IND_WDATA_1 0xC01528 + +#define mmPCIE_WRAP_IND_WDATA_2 0xC0152C + +#define mmPCIE_WRAP_IND_WDATA_3 0xC01530 + +#define mmPCIE_WRAP_IND_WSTRB 0xC01544 + +#define mmPCIE_WRAP_IND_WLAST 0xC01548 + +#define mmPCIE_WRAP_IND_WVALID 0xC0154C + +#define mmPCIE_WRAP_IND_BRESP 0xC01550 + +#define mmPCIE_WRAP_IND_BVALID 0xC01554 + +#define mmPCIE_WRAP_IND_ARADDR_0 0xC01558 + +#define mmPCIE_WRAP_IND_ARADDR_1 0xC0155C + +#define mmPCIE_WRAP_IND_ARLEN 0xC01560 + +#define mmPCIE_WRAP_IND_ARSIZE 0xC01564 + +#define mmPCIE_WRAP_IND_ARBURST 0xC01568 + +#define mmPCIE_WRAP_IND_ARLOCK 0xC0156C + +#define mmPCIE_WRAP_IND_ARCACHE 0xC01570 + +#define mmPCIE_WRAP_IND_ARPROT 0xC01574 + +#define mmPCIE_WRAP_IND_ARVALID 0xC01578 + +#define mmPCIE_WRAP_IND_RDATA_0 0xC0157C + +#define mmPCIE_WRAP_IND_RDATA_1 0xC01580 + +#define mmPCIE_WRAP_IND_RDATA_2 0xC01584 + +#define mmPCIE_WRAP_IND_RDATA_3 0xC01588 + +#define mmPCIE_WRAP_IND_RLAST 0xC0159C + +#define mmPCIE_WRAP_IND_RRESP 0xC015A0 + +#define mmPCIE_WRAP_IND_RVALID 0xC015A4 + +#define mmPCIE_WRAP_IND_AWMISC_INFO 0xC015A8 + +#define mmPCIE_WRAP_IND_AWMISC_INFO_HDR_34DW_0 0xC015AC + +#define mmPCIE_WRAP_IND_AWMISC_INFO_HDR_34DW_1 0xC015B0 + +#define mmPCIE_WRAP_IND_AWMISC_INFO_P_TAG 0xC015B4 + +#define mmPCIE_WRAP_IND_AWMISC_INFO_ATU_BYPAS 0xC015B8 + +#define mmPCIE_WRAP_IND_AWMISC_INFO_FUNC_NUM 0xC015BC + +#define mmPCIE_WRAP_IND_AWMISC_INFO_VFUNC_ACT 0xC015C0 + +#define mmPCIE_WRAP_IND_AWMISC_INFO_VFUNC_NUM 0xC015C4 + +#define mmPCIE_WRAP_IND_AWMISC_INFO_TLPPRFX 0xC015C8 + +#define mmPCIE_WRAP_IND_ARMISC_INFO 0xC015CC + +#define mmPCIE_WRAP_IND_ARMISC_INFO_TLPPRFX 0xC015D0 + +#define mmPCIE_WRAP_IND_ARMISC_INFO_ATU_BYP 0xC015D4 + +#define mmPCIE_WRAP_IND_ARMISC_INFO_FUNC_NUM 0xC015D8 + +#define mmPCIE_WRAP_IND_ARMISC_INFO_VFUNC_ACT 0xC015DC + +#define mmPCIE_WRAP_IND_ARMISC_INFO_VFUNC_NUM 0xC015E0 + +#define mmPCIE_WRAP_SLV_AWMISC_INFO 0xC01800 + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_HDR_34DW_0 0xC01804 + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_HDR_34DW_1 0xC01808 + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_P_TAG 0xC0180C + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_ATU_BYPAS 0xC01810 + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_FUNC_NUM 0xC01814 + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_VFUNC_ACT 0xC01818 + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_VFUNC_NUM 0xC0181C + +#define mmPCIE_WRAP_SLV_AWMISC_INFO_TLPPRFX 0xC01820 + +#define mmPCIE_WRAP_SLV_ARMISC_INFO 0xC01824 + +#define mmPCIE_WRAP_SLV_ARMISC_INFO_TLPPRFX 0xC01828 + +#define mmPCIE_WRAP_SLV_ARMISC_INFO_ATU_BYP 0xC0182C + +#define mmPCIE_WRAP_SLV_ARMISC_INFO_FUNC_NUM 0xC01830 + +#define mmPCIE_WRAP_SLV_ARMISC_INFO_VFUNC_ACT 0xC01834 + +#define mmPCIE_WRAP_SLV_ARMISC_INFO_VFUNC_NUM 0xC01838 + +#define mmPCIE_WRAP_MAX_QID 0xC01900 + +#define mmPCIE_WRAP_DB_BASE_ADDR_L_0 0xC01910 + +#define mmPCIE_WRAP_DB_BASE_ADDR_L_1 0xC01914 + +#define mmPCIE_WRAP_DB_BASE_ADDR_L_2 0xC01918 + +#define mmPCIE_WRAP_DB_BASE_ADDR_L_3 0xC0191C + +#define mmPCIE_WRAP_DB_BASE_ADDR_H_0 0xC01920 + +#define mmPCIE_WRAP_DB_BASE_ADDR_H_1 0xC01924 + +#define mmPCIE_WRAP_DB_BASE_ADDR_H_2 0xC01928 + +#define mmPCIE_WRAP_DB_BASE_ADDR_H_3 0xC0192C + +#define mmPCIE_WRAP_DB_MASK 0xC01940 + +#define mmPCIE_WRAP_SQ_BASE_ADDR_H 0xC01A00 + +#define mmPCIE_WRAP_SQ_BASE_ADDR_L 0xC01A04 + +#define mmPCIE_WRAP_SQ_STRIDE_ACCRESS 0xC01A08 + +#define mmPCIE_WRAP_SQ_POP_CMD 0xC01A10 + +#define mmPCIE_WRAP_SQ_POP_DATA 0xC01A14 + +#define mmPCIE_WRAP_DB_INTR_0 0xC01A20 + +#define mmPCIE_WRAP_DB_INTR_1 0xC01A24 + +#define mmPCIE_WRAP_DB_INTR_2 0xC01A28 + +#define mmPCIE_WRAP_DB_INTR_3 0xC01A2C + +#define mmPCIE_WRAP_DB_INTR_4 0xC01A30 + +#define mmPCIE_WRAP_DB_INTR_5 0xC01A34 + +#define mmPCIE_WRAP_DB_INTR_6 0xC01A38 + +#define mmPCIE_WRAP_DB_INTR_7 0xC01A3C + +#define mmPCIE_WRAP_MMU_BYPASS_DMA 0xC01A80 + +#define mmPCIE_WRAP_MMU_BYPASS_NON_DMA 0xC01A84 + +#define mmPCIE_WRAP_ASID_NON_DMA 0xC01A90 + +#define mmPCIE_WRAP_ASID_DMA_0 0xC01AA0 + +#define mmPCIE_WRAP_ASID_DMA_1 0xC01AA4 + +#define mmPCIE_WRAP_ASID_DMA_2 0xC01AA8 + +#define mmPCIE_WRAP_ASID_DMA_3 0xC01AAC + +#define mmPCIE_WRAP_ASID_DMA_4 0xC01AB0 + +#define mmPCIE_WRAP_ASID_DMA_5 0xC01AB4 + +#define mmPCIE_WRAP_ASID_DMA_6 0xC01AB8 + +#define mmPCIE_WRAP_ASID_DMA_7 0xC01ABC + +#define mmPCIE_WRAP_CPU_HOT_RST 0xC01AE0 + +#define mmPCIE_WRAP_AXI_PROT_OVR 0xC01AE4 + +#define mmPCIE_WRAP_CACHE_OVR 0xC01B00 + +#define mmPCIE_WRAP_LOCK_OVR 0xC01B04 + +#define mmPCIE_WRAP_PROT_OVR 0xC01B08 + +#define mmPCIE_WRAP_ARUSER_OVR 0xC01B0C + +#define mmPCIE_WRAP_AWUSER_OVR 0xC01B10 + +#define mmPCIE_WRAP_ARUSER_OVR_EN 0xC01B14 + +#define mmPCIE_WRAP_AWUSER_OVR_EN 0xC01B18 + +#define mmPCIE_WRAP_MAX_OUTSTAND 0xC01B20 + +#define mmPCIE_WRAP_MST_IN 0xC01B24 + +#define mmPCIE_WRAP_RSP_OK 0xC01B28 + +#define mmPCIE_WRAP_LBW_CACHE_OVR 0xC01B40 + +#define mmPCIE_WRAP_LBW_LOCK_OVR 0xC01B44 + +#define mmPCIE_WRAP_LBW_PROT_OVR 0xC01B48 + +#define mmPCIE_WRAP_LBW_ARUSER_OVR 0xC01B4C + +#define mmPCIE_WRAP_LBW_AWUSER_OVR 0xC01B50 + +#define mmPCIE_WRAP_LBW_ARUSER_OVR_EN 0xC01B58 + +#define mmPCIE_WRAP_LBW_AWUSER_OVR_EN 0xC01B5C + +#define mmPCIE_WRAP_LBW_MAX_OUTSTAND 0xC01B60 + +#define mmPCIE_WRAP_LBW_MST_IN 0xC01B64 + +#define mmPCIE_WRAP_LBW_RSP_OK 0xC01B68 + +#define mmPCIE_WRAP_QUEUE_INIT 0xC01C00 + +#define mmPCIE_WRAP_AXI_SPLIT_INTR_0 0xC01C10 + +#define mmPCIE_WRAP_AXI_SPLIT_INTR_1 0xC01C14 + +#define mmPCIE_WRAP_DB_AWUSER 0xC01D00 + +#define mmPCIE_WRAP_DB_ARUSER 0xC01D04 + +#define mmPCIE_WRAP_PCIE_AWUSER 0xC01D08 + +#define mmPCIE_WRAP_PCIE_ARUSER 0xC01D0C + +#define mmPCIE_WRAP_PSOC_AWUSER 0xC01D10 + +#define mmPCIE_WRAP_PSOC_ARUSER 0xC01D14 + +#define mmPCIE_WRAP_SCH_Q_AWUSER 0xC01D18 + +#define mmPCIE_WRAP_SCH_Q_ARUSER 0xC01D1C + +#define mmPCIE_WRAP_PSOC2PCI_AWUSER 0xC01D40 + +#define mmPCIE_WRAP_PSOC2PCI_ARUSER 0xC01D44 + +#define mmPCIE_WRAP_DRAIN_TIMEOUT 0xC01D50 + +#define mmPCIE_WRAP_DRAIN_CFG 0xC01D54 + +#define mmPCIE_WRAP_DB_AXI_ERR 0xC01DE0 + +#define mmPCIE_WRAP_SPMU_INTR 0xC01DE4 + +#define mmPCIE_WRAP_AXI_INTR 0xC01DE8 + +#define mmPCIE_WRAP_E2E_CTRL 0xC01DF0 + +#endif /* ASIC_REG_PCIE_WRAP_REGS_H_ */ diff --git a/drivers/misc/habanalabs/include/goya/goya_coresight.h b/drivers/misc/habanalabs/include/goya/goya_coresight.h new file mode 100644 index 000000000000..6e933c0ca5cd --- /dev/null +++ b/drivers/misc/habanalabs/include/goya/goya_coresight.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2018 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef GOYA_CORESIGHT_H +#define GOYA_CORESIGHT_H + +enum goya_debug_stm_regs_index { + GOYA_STM_FIRST = 0, + GOYA_STM_CPU = GOYA_STM_FIRST, + GOYA_STM_DMA_CH_0_CS, + GOYA_STM_DMA_CH_1_CS, + GOYA_STM_DMA_CH_2_CS, + GOYA_STM_DMA_CH_3_CS, + GOYA_STM_DMA_CH_4_CS, + GOYA_STM_DMA_MACRO_CS, + GOYA_STM_MME1_SBA, + GOYA_STM_MME3_SBB, + GOYA_STM_MME4_WACS2, + GOYA_STM_MME4_WACS, + GOYA_STM_MMU_CS, + GOYA_STM_PCIE, + GOYA_STM_PSOC, + GOYA_STM_TPC0_EML, + GOYA_STM_TPC1_EML, + GOYA_STM_TPC2_EML, + GOYA_STM_TPC3_EML, + GOYA_STM_TPC4_EML, + GOYA_STM_TPC5_EML, + GOYA_STM_TPC6_EML, + GOYA_STM_TPC7_EML, + GOYA_STM_LAST = GOYA_STM_TPC7_EML +}; + +enum goya_debug_etf_regs_index { + GOYA_ETF_FIRST = 0, + GOYA_ETF_CPU_0 = GOYA_ETF_FIRST, + GOYA_ETF_CPU_1, + GOYA_ETF_CPU_TRACE, + GOYA_ETF_DMA_CH_0_CS, + GOYA_ETF_DMA_CH_1_CS, + GOYA_ETF_DMA_CH_2_CS, + GOYA_ETF_DMA_CH_3_CS, + GOYA_ETF_DMA_CH_4_CS, + GOYA_ETF_DMA_MACRO_CS, + GOYA_ETF_MME1_SBA, + GOYA_ETF_MME3_SBB, + GOYA_ETF_MME4_WACS2, + GOYA_ETF_MME4_WACS, + GOYA_ETF_MMU_CS, + GOYA_ETF_PCIE, + GOYA_ETF_PSOC, + GOYA_ETF_TPC0_EML, + GOYA_ETF_TPC1_EML, + GOYA_ETF_TPC2_EML, + GOYA_ETF_TPC3_EML, + GOYA_ETF_TPC4_EML, + GOYA_ETF_TPC5_EML, + GOYA_ETF_TPC6_EML, + GOYA_ETF_TPC7_EML, + GOYA_ETF_LAST = GOYA_ETF_TPC7_EML +}; + +enum goya_debug_funnel_regs_index { + GOYA_FUNNEL_FIRST = 0, + GOYA_FUNNEL_CPU = GOYA_FUNNEL_FIRST, + GOYA_FUNNEL_DMA_CH_6_1, + GOYA_FUNNEL_DMA_MACRO_3_1, + GOYA_FUNNEL_MME0_RTR, + GOYA_FUNNEL_MME1_RTR, + GOYA_FUNNEL_MME2_RTR, + GOYA_FUNNEL_MME3_RTR, + GOYA_FUNNEL_MME4_RTR, + GOYA_FUNNEL_MME5_RTR, + GOYA_FUNNEL_PCIE, + GOYA_FUNNEL_PSOC, + GOYA_FUNNEL_TPC0_EML, + GOYA_FUNNEL_TPC1_EML, + GOYA_FUNNEL_TPC1_RTR, + GOYA_FUNNEL_TPC2_EML, + GOYA_FUNNEL_TPC2_RTR, + GOYA_FUNNEL_TPC3_EML, + GOYA_FUNNEL_TPC3_RTR, + GOYA_FUNNEL_TPC4_EML, + GOYA_FUNNEL_TPC4_RTR, + GOYA_FUNNEL_TPC5_EML, + GOYA_FUNNEL_TPC5_RTR, + GOYA_FUNNEL_TPC6_EML, + GOYA_FUNNEL_TPC6_RTR, + GOYA_FUNNEL_TPC7_EML, + GOYA_FUNNEL_LAST = GOYA_FUNNEL_TPC7_EML +}; + +enum goya_debug_bmon_regs_index { + GOYA_BMON_FIRST = 0, + GOYA_BMON_CPU_RD = GOYA_BMON_FIRST, + GOYA_BMON_CPU_WR, + GOYA_BMON_DMA_CH_0_0, + GOYA_BMON_DMA_CH_0_1, + GOYA_BMON_DMA_CH_1_0, + GOYA_BMON_DMA_CH_1_1, + GOYA_BMON_DMA_CH_2_0, + GOYA_BMON_DMA_CH_2_1, + GOYA_BMON_DMA_CH_3_0, + GOYA_BMON_DMA_CH_3_1, + GOYA_BMON_DMA_CH_4_0, + GOYA_BMON_DMA_CH_4_1, + GOYA_BMON_DMA_MACRO_0, + GOYA_BMON_DMA_MACRO_1, + GOYA_BMON_DMA_MACRO_2, + GOYA_BMON_DMA_MACRO_3, + GOYA_BMON_DMA_MACRO_4, + GOYA_BMON_DMA_MACRO_5, + GOYA_BMON_DMA_MACRO_6, + GOYA_BMON_DMA_MACRO_7, + GOYA_BMON_MME1_SBA_0, + GOYA_BMON_MME1_SBA_1, + GOYA_BMON_MME3_SBB_0, + GOYA_BMON_MME3_SBB_1, + GOYA_BMON_MME4_WACS2_0, + GOYA_BMON_MME4_WACS2_1, + GOYA_BMON_MME4_WACS2_2, + GOYA_BMON_MME4_WACS_0, + GOYA_BMON_MME4_WACS_1, + GOYA_BMON_MME4_WACS_2, + GOYA_BMON_MME4_WACS_3, + GOYA_BMON_MME4_WACS_4, + GOYA_BMON_MME4_WACS_5, + GOYA_BMON_MME4_WACS_6, + GOYA_BMON_MMU_0, + GOYA_BMON_MMU_1, + GOYA_BMON_PCIE_MSTR_RD, + GOYA_BMON_PCIE_MSTR_WR, + GOYA_BMON_PCIE_SLV_RD, + GOYA_BMON_PCIE_SLV_WR, + GOYA_BMON_TPC0_EML_0, + GOYA_BMON_TPC0_EML_1, + GOYA_BMON_TPC0_EML_2, + GOYA_BMON_TPC0_EML_3, + GOYA_BMON_TPC1_EML_0, + GOYA_BMON_TPC1_EML_1, + GOYA_BMON_TPC1_EML_2, + GOYA_BMON_TPC1_EML_3, + GOYA_BMON_TPC2_EML_0, + GOYA_BMON_TPC2_EML_1, + GOYA_BMON_TPC2_EML_2, + GOYA_BMON_TPC2_EML_3, + GOYA_BMON_TPC3_EML_0, + GOYA_BMON_TPC3_EML_1, + GOYA_BMON_TPC3_EML_2, + GOYA_BMON_TPC3_EML_3, + GOYA_BMON_TPC4_EML_0, + GOYA_BMON_TPC4_EML_1, + GOYA_BMON_TPC4_EML_2, + GOYA_BMON_TPC4_EML_3, + GOYA_BMON_TPC5_EML_0, + GOYA_BMON_TPC5_EML_1, + GOYA_BMON_TPC5_EML_2, + GOYA_BMON_TPC5_EML_3, + GOYA_BMON_TPC6_EML_0, + GOYA_BMON_TPC6_EML_1, + GOYA_BMON_TPC6_EML_2, + GOYA_BMON_TPC6_EML_3, + GOYA_BMON_TPC7_EML_0, + GOYA_BMON_TPC7_EML_1, + GOYA_BMON_TPC7_EML_2, + GOYA_BMON_TPC7_EML_3, + GOYA_BMON_LAST = GOYA_BMON_TPC7_EML_3 +}; + +enum goya_debug_spmu_regs_index { + GOYA_SPMU_FIRST = 0, + GOYA_SPMU_DMA_CH_0_CS = GOYA_SPMU_FIRST, + GOYA_SPMU_DMA_CH_1_CS, + GOYA_SPMU_DMA_CH_2_CS, + GOYA_SPMU_DMA_CH_3_CS, + GOYA_SPMU_DMA_CH_4_CS, + GOYA_SPMU_DMA_MACRO_CS, + GOYA_SPMU_MME1_SBA, + GOYA_SPMU_MME3_SBB, + GOYA_SPMU_MME4_WACS2, + GOYA_SPMU_MME4_WACS, + GOYA_SPMU_MMU_CS, + GOYA_SPMU_PCIE, + GOYA_SPMU_TPC0_EML, + GOYA_SPMU_TPC1_EML, + GOYA_SPMU_TPC2_EML, + GOYA_SPMU_TPC3_EML, + GOYA_SPMU_TPC4_EML, + GOYA_SPMU_TPC5_EML, + GOYA_SPMU_TPC6_EML, + GOYA_SPMU_TPC7_EML, + GOYA_SPMU_LAST = GOYA_SPMU_TPC7_EML +}; + +#endif /* GOYA_CORESIGHT_H */ From 90027296adad1e9935a893f1e5973959274fa0d1 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 3 Apr 2019 09:51:04 +0300 Subject: [PATCH 25/31] uapi/habanalabs: fix some comments in uapi file This patch adds a better explanation about the sequence number that is returned per CS. It also fixes the comment about queue numbering rules. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 7704fe08c3ad..613d431da783 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -20,8 +20,8 @@ /* * Queue Numbering * - * The external queues (DMA channels + CPU) MUST be before the internal queues - * and each group (DMA channels + CPU and internal) must be contiguous inside + * The external queues (PCI DMA channels) MUST be before the internal queues + * and each group (PCI DMA channels and internal) must be contiguous inside * itself but there can be a gap between the two groups (although not * recommended) */ @@ -477,6 +477,12 @@ struct hl_debug_args { * Each JOB will be enqueued on a specific queue, according to the user's input. * There can be more then one JOB per queue. * + * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase, + * a second set is for "execution" phase and a third set is for "store" phase. + * The JOBS on the "restore" phase are enqueued only after context-switch + * (or if its the first CS for this context). The user can also order the + * driver to run the "restore" phase explicitly + * * There are two types of queues - external and internal. External queues * are DMA queues which transfer data from/to the Host. All other queues are * internal. The driver will get completion notifications from the device only @@ -493,19 +499,18 @@ struct hl_debug_args { * relevant queues. Therefore, the user mustn't assume the CS has been completed * or has even started to execute. * - * Upon successful enqueue, the IOCTL returns an opaque handle which the user + * Upon successful enqueue, the IOCTL returns a sequence number which the user * can use with the "Wait for CS" IOCTL to check whether the handle's CS * external JOBS have been completed. Note that if the CS has internal JOBS * which can execute AFTER the external JOBS have finished, the driver might * report that the CS has finished executing BEFORE the internal JOBS have * actually finish executing. * - * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase, - * a second set is for "execution" phase and a third set is for "store" phase. - * The JOBS on the "restore" phase are enqueued only after context-switch - * (or if its the first CS for this context). The user can also order the - * driver to run the "restore" phase explicitly - * + * Even though the sequence number increments per CS, the user can NOT + * automatically assume that if CS with sequence number N finished, then CS + * with sequence number N-1 also finished. The user can make this assumption if + * and only if CS N and CS N-1 are exactly the same (same CBs for the same + * queues). */ #define HL_IOCTL_CS \ _IOWR('H', 0x03, union hl_cs_args) From bedd14425d86a8fac6972055dcd7591de21c10be Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 2 Apr 2019 15:56:16 +0300 Subject: [PATCH 26/31] habanalabs: refactoring in goya.c This patch does some refactoring in goya.c to make code more reusable between goya code and the goya simulator code (which is not upstreamed). In addition, the patch removes some dead functions from goya.c which are not used by the current upstream code Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 146 +++++++++++++-------------- drivers/misc/habanalabs/goya/goyaP.h | 19 +++- drivers/misc/habanalabs/habanalabs.h | 4 - 3 files changed, 88 insertions(+), 81 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index c6b9d4aabef6..aaa05662ca9f 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -78,7 +78,6 @@ #define GOYA_RESET_WAIT_MSEC 1 /* 1ms */ #define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */ #define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ -#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */ #define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) @@ -298,12 +297,6 @@ static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 }; -static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); -static int goya_mmu_clear_pgt_range(struct hl_device *hdev); -static int goya_mmu_set_dram_default_page(struct hl_device *hdev); -static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, - u64 phys_addr); - static void goya_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -511,6 +504,28 @@ static int goya_early_fini(struct hl_device *hdev) return 0; } +static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) +{ + /* mask to zero the MMBP and ASID bits */ + WREG32_AND(reg, ~0x7FF); + WREG32_OR(reg, asid); +} + +static void goya_qman0_set_security(struct hl_device *hdev, bool secure) +{ + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + return; + + if (secure) + WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED); + else + WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED); + + RREG32(mmDMA_QM_0_GLBL_PROT); +} + /* * goya_fetch_psoc_frequency - Fetch PSOC frequency values * @@ -633,6 +648,9 @@ static int goya_sw_init(struct hl_device *hdev) goya->tpc_clk = GOYA_PLL_FREQ_LOW; goya->ic_clk = GOYA_PLL_FREQ_LOW; + goya->mmu_prepare_reg = goya_mmu_prepare_reg; + goya->qman0_set_security = goya_qman0_set_security; + hdev->asic_specific = goya; /* Create DMA pool for small allocations */ @@ -2324,6 +2342,38 @@ out: return 0; } +static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, + u64 phys_addr) +{ + u32 status, timeout_usec; + int rc; + + if (hdev->pldm) + timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; + else + timeout_usec = MMU_CONFIG_TIMEOUT_USEC; + + WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); + WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); + WREG32(MMU_ASID_BUSY, 0x80000000 | asid); + + rc = hl_poll_timeout( + hdev, + MMU_ASID_BUSY, + status, + !(status & 0x80000000), + 1000, + timeout_usec); + + if (rc) { + dev_err(hdev->dev, + "Timeout during MMU hop0 config of asid %d\n", asid); + return rc; + } + + return 0; +} + static int goya_mmu_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -2798,10 +2848,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) *fence_ptr = 0; - if (goya->hw_cap_initialized & HW_CAP_MMU) { - WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED); - RREG32(mmDMA_QM_0_GLBL_PROT); - } + goya->qman0_set_security(hdev, true); /* * goya cs parser saves space for 2xpacket_msg_prot at end of CB. For @@ -2843,10 +2890,7 @@ free_fence_ptr: hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); - if (goya->hw_cap_initialized & HW_CAP_MMU) { - WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED); - RREG32(mmDMA_QM_0_GLBL_PROT); - } + goya->qman0_set_security(hdev, false); return rc; } @@ -2953,7 +2997,7 @@ int goya_test_cpu_queue(struct hl_device *hdev) return hl_fw_test_cpu_queue(hdev); } -static int goya_test_queues(struct hl_device *hdev) +int goya_test_queues(struct hl_device *hdev) { int i, rc, ret_val = 0; @@ -2987,14 +3031,14 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_pool_free(hdev->dma_pool, vaddr, dma_addr); } -static void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, - size_t size, dma_addr_t *dma_handle) +void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle) { return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); } -static void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, - size_t size, void *vaddr) +void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr) { hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } @@ -4211,8 +4255,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, - total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); + rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, + HL_DEVICE_TIMEOUT_USEC, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -4244,7 +4288,7 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, &result); if (rc) @@ -4466,7 +4510,7 @@ static int goya_context_switch(struct hl_device *hdev, u32 asid) return 0; } -static int goya_mmu_clear_pgt_range(struct hl_device *hdev) +int goya_mmu_clear_pgt_range(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct goya_device *goya = hdev->asic_specific; @@ -4480,7 +4524,7 @@ static int goya_mmu_clear_pgt_range(struct hl_device *hdev) return goya_memset_device_memory(hdev, addr, size, 0, true); } -static int goya_mmu_set_dram_default_page(struct hl_device *hdev) +int goya_mmu_set_dram_default_page(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; u64 addr = hdev->asic_prop.mmu_dram_default_page_addr; @@ -4493,7 +4537,7 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev) return goya_memset_device_memory(hdev, addr, size, val, true); } -static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) +void goya_mmu_prepare(struct hl_device *hdev, u32 asid) { struct goya_device *goya = hdev->asic_specific; int i; @@ -4507,10 +4551,8 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) } /* zero the MMBP and ASID bits and then set the ASID */ - for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) { - WREG32_AND(goya_mmu_regs[i], ~0x7FF); - WREG32_OR(goya_mmu_regs[i], asid); - } + for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) + goya->mmu_prepare_reg(hdev, goya_mmu_regs[i], asid); } static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard) @@ -4601,38 +4643,6 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev, "Timeout when waiting for MMU cache invalidation\n"); } -static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, - u64 phys_addr) -{ - u32 status, timeout_usec; - int rc; - - if (hdev->pldm) - timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; - else - timeout_usec = MMU_CONFIG_TIMEOUT_USEC; - - WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); - WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); - WREG32(MMU_ASID_BUSY, 0x80000000 | asid); - - rc = hl_poll_timeout( - hdev, - MMU_ASID_BUSY, - status, - !(status & 0x80000000), - 1000, - timeout_usec); - - if (rc) { - dev_err(hdev->dev, - "Timeout during MMU hop0 config of asid %d\n", asid); - return rc; - } - - return 0; -} - int goya_send_heartbeat(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; @@ -4674,16 +4684,6 @@ int goya_armcp_info_get(struct hl_device *hdev) return 0; } -static void goya_init_clock_gating(struct hl_device *hdev) -{ - -} - -static void goya_disable_clock_gating(struct hl_device *hdev) -{ - -} - static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size) { u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg; @@ -4814,8 +4814,6 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, .send_heartbeat = goya_send_heartbeat, - .enable_clock_gating = goya_init_clock_gating, - .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, .soft_reset_late_init = goya_soft_reset_late_init, diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 2a6cdca744dd..b572e0263ac5 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -39,11 +39,13 @@ #error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES" #endif -#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */ +#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */ -#define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */ +#define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */ -#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ +#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ + +#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */ #define TPC_ENABLED_MASK 0xFF @@ -145,6 +147,9 @@ enum goya_fw_component { }; struct goya_device { + void (*mmu_prepare_reg)(struct hl_device *hdev, u64 reg, u32 asid); + void (*qman0_set_security)(struct hl_device *hdev, bool secure); + /* TODO: remove hw_queues_lock after moving to scheduler code */ spinlock_t hw_queues_lock; @@ -180,6 +185,10 @@ void goya_init_security(struct hl_device *hdev); int goya_debug_coresight(struct hl_device *hdev, void *data); u64 goya_get_max_power(struct hl_device *hdev); void goya_set_max_power(struct hl_device *hdev, u64 value); +int goya_test_queues(struct hl_device *hdev); +void goya_mmu_prepare(struct hl_device *hdev, u32 asid); +int goya_mmu_clear_pgt_range(struct hl_device *hdev); +int goya_mmu_set_dram_default_page(struct hl_device *hdev); void goya_late_fini(struct hl_device *hdev); int goya_suspend(struct hl_device *hdev); @@ -195,5 +204,9 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id, u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt); int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id); int goya_send_heartbeat(struct hl_device *hdev); +void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle); +void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr); #endif /* GOYAP_H_ */ diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 18a03657780a..535d4f9531f6 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -481,8 +481,6 @@ enum hl_pll_frequency { * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. - * @enable_clock_gating: enable clock gating for reducing power consumption. - * @disable_clock_gating: disable clock for accessing registers on HBW. * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @soft_reset_late_init: perform certain actions needed after soft reset. @@ -556,8 +554,6 @@ struct hl_asic_funcs { void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); - void (*enable_clock_gating)(struct hl_device *hdev); - void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size); int (*soft_reset_late_init)(struct hl_device *hdev); From 295938406cbcb541de8893d0280a2265c41e506d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 4 Apr 2019 14:33:34 +0300 Subject: [PATCH 27/31] habanalabs: ASIC_AUTO_DETECT enum value is redundant This patch removes the enum value of ASIC_AUTO_DETECT because we can use the validity of the pdev variable to know whether we have a real device or a simulator. For a real device, we detect the asic type from the device ID while for a simulator, the simulator code calls create_hdev() with the specified ASIC type. Set ASIC_INVALID as the first option in the enum to make sure that no other enum value will receive the value 0 (which indicates a non-existing entry in the simulator array). Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/habanalabs.h | 8 +++----- drivers/misc/habanalabs/habanalabs_drv.c | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 535d4f9531f6..2f02bb55f66a 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -390,14 +390,12 @@ struct hl_eq { /** * enum hl_asic_type - supported ASIC types. - * @ASIC_AUTO_DETECT: ASIC type will be automatically set. - * @ASIC_GOYA: Goya device. * @ASIC_INVALID: Invalid ASIC type. + * @ASIC_GOYA: Goya device. */ enum hl_asic_type { - ASIC_AUTO_DETECT, - ASIC_GOYA, - ASIC_INVALID + ASIC_INVALID, + ASIC_GOYA }; struct hl_cs_parser; diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index b697339d3904..1667df7ca64c 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -218,7 +218,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->disabled = true; hdev->pdev = pdev; /* can be NULL in case of simulator device */ - if (asic_type == ASIC_AUTO_DETECT) { + if (pdev) { hdev->asic_type = get_asic_type(pdev->device); if (hdev->asic_type == ASIC_INVALID) { dev_err(&pdev->dev, "Unsupported ASIC\n"); @@ -337,7 +337,7 @@ static int hl_pci_probe(struct pci_dev *pdev, " device found [%04x:%04x] (rev %x)\n", (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); - rc = create_hdev(&hdev, pdev, ASIC_AUTO_DETECT, -1); + rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1); if (rc) return rc; From 54303a1aef95b0cbd6a04c3b729c93da7a58e0f7 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 4 Apr 2019 14:42:26 +0300 Subject: [PATCH 28/31] habanalabs: split mmu/no-mmu code paths in memory ioctl To make the memory ioctl code more readable, this patch moves the legacy/debug code path of mmu-disabled to a separate function, which is called (if necessary) from the main memory ioctl function. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 187 ++++++++++++++++--------------- 1 file changed, 97 insertions(+), 90 deletions(-) diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index e9e163545584..05919f913300 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -1090,6 +1090,64 @@ vm_type_err: return rc; } +static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + u64 device_addr = 0; + u32 handle = 0; + int rc; + + switch (args->in.op) { + case HL_MEM_OP_ALLOC: + if (args->in.alloc.mem_size == 0) { + dev_err(hdev->dev, + "alloc size must be larger than 0\n"); + rc = -EINVAL; + goto out; + } + + /* Force contiguous as there are no real MMU + * translations to overcome physical memory gaps + */ + args->in.flags |= HL_MEM_CONTIGUOUS; + rc = alloc_device_memory(ctx, &args->in, &handle); + + memset(args, 0, sizeof(*args)); + args->out.handle = (__u64) handle; + break; + + case HL_MEM_OP_FREE: + rc = free_device_memory(ctx, args->in.free.handle); + break; + + case HL_MEM_OP_MAP: + if (args->in.flags & HL_MEM_USERPTR) { + device_addr = args->in.map_host.host_virt_addr; + rc = 0; + } else { + rc = get_paddr_from_handle(ctx, &args->in, + &device_addr); + } + + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; + break; + + case HL_MEM_OP_UNMAP: + rc = 0; + break; + + default: + dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); + rc = -ENOTTY; + break; + } + +out: + return rc; +} + int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_mem_args *args = data; @@ -1105,100 +1163,49 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) return -EBUSY; } - if (hdev->mmu_enable) { - switch (args->in.op) { - case HL_MEM_OP_ALLOC: - if (!hdev->dram_supports_virtual_memory) { - dev_err(hdev->dev, - "DRAM alloc is not supported\n"); - rc = -EINVAL; - goto out; - } - if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, - "alloc size must be larger than 0\n"); - rc = -EINVAL; - goto out; - } - rc = alloc_device_memory(ctx, &args->in, &handle); + if (!hdev->mmu_enable) + return mem_ioctl_no_mmu(hpriv, args); - memset(args, 0, sizeof(*args)); - args->out.handle = (__u64) handle; - break; - - case HL_MEM_OP_FREE: - if (!hdev->dram_supports_virtual_memory) { - dev_err(hdev->dev, - "DRAM free is not supported\n"); - rc = -EINVAL; - goto out; - } - rc = free_device_memory(ctx, args->in.free.handle); - break; - - case HL_MEM_OP_MAP: - rc = map_device_va(ctx, &args->in, &device_addr); - - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; - break; - - case HL_MEM_OP_UNMAP: - rc = unmap_device_va(ctx, - args->in.unmap.device_virt_addr); - break; - - default: - dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); - rc = -ENOTTY; - break; + switch (args->in.op) { + case HL_MEM_OP_ALLOC: + if (!hdev->dram_supports_virtual_memory) { + dev_err(hdev->dev, "DRAM alloc is not supported\n"); + rc = -EINVAL; + goto out; } - } else { - switch (args->in.op) { - case HL_MEM_OP_ALLOC: - if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, - "alloc size must be larger than 0\n"); - rc = -EINVAL; - goto out; - } - /* Force contiguous as there are no real MMU - * translations to overcome physical memory gaps - */ - args->in.flags |= HL_MEM_CONTIGUOUS; - rc = alloc_device_memory(ctx, &args->in, &handle); - - memset(args, 0, sizeof(*args)); - args->out.handle = (__u64) handle; - break; - - case HL_MEM_OP_FREE: - rc = free_device_memory(ctx, args->in.free.handle); - break; - - case HL_MEM_OP_MAP: - if (args->in.flags & HL_MEM_USERPTR) { - device_addr = args->in.map_host.host_virt_addr; - rc = 0; - } else { - rc = get_paddr_from_handle(ctx, &args->in, - &device_addr); - } - - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; - break; - - case HL_MEM_OP_UNMAP: - rc = 0; - break; - - default: - dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); - rc = -ENOTTY; - break; + if (args->in.alloc.mem_size == 0) { + dev_err(hdev->dev, + "alloc size must be larger than 0\n"); + rc = -EINVAL; + goto out; } + rc = alloc_device_memory(ctx, &args->in, &handle); + + memset(args, 0, sizeof(*args)); + args->out.handle = (__u64) handle; + break; + + case HL_MEM_OP_FREE: + rc = free_device_memory(ctx, args->in.free.handle); + break; + + case HL_MEM_OP_MAP: + rc = map_device_va(ctx, &args->in, &device_addr); + + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; + break; + + case HL_MEM_OP_UNMAP: + rc = unmap_device_va(ctx, + args->in.unmap.device_virt_addr); + break; + + default: + dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); + rc = -ENOTTY; + break; } out: From caa3c8e52582fc4d2ed82afd5e7ea164c18ef4fe Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 6 Apr 2019 13:23:54 +0300 Subject: [PATCH 29/31] habanalabs: all FD must be closed before removing device This patch fixes a bug in the implementation of the function that removes the device. The bug can happen when the device is removed but not the driver itself (e.g. remove by the OS due to PCI freeze in Power architecture). In that case, there maybe open users that are calling IOCTLs while the device is removed. This is a possible race condition that the driver must handle. Otherwise, a kernel panic may occur. This race is prevented in the hard-reset flow, because the driver makes sure the users are closed before continuing with the hard-reset. This race can not occur when the driver itself is removed because the OS makes sure all the file descriptors are closed. The fix is to make sure the open users close their file descriptors and if they don't (after a certain amount of time), the driver sends them a SIGKILL, because the remove of the device can't be stopped. The patch re-uses the same code that is called from the hard-reset flow. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 6cbfd560721e..56d3ba53c661 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -513,11 +513,8 @@ disable_device: return rc; } -static void hl_device_hard_reset_pending(struct work_struct *work) +static void device_kill_open_processes(struct hl_device *hdev) { - struct hl_device_reset_work *device_reset_work = - container_of(work, struct hl_device_reset_work, reset_work); - struct hl_device *hdev = device_reset_work->hdev; u16 pending_total, pending_cnt; struct task_struct *task = NULL; @@ -552,6 +549,12 @@ static void hl_device_hard_reset_pending(struct work_struct *work) } } + /* We killed the open users, but because the driver cleans up after the + * user contexts are closed (e.g. mmu mappings), we need to wait again + * to make sure the cleaning phase is finished before continuing with + * the reset + */ + pending_cnt = pending_total; while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) { @@ -567,6 +570,16 @@ static void hl_device_hard_reset_pending(struct work_struct *work) mutex_unlock(&hdev->fd_open_cnt_lock); +} + +static void device_hard_reset_pending(struct work_struct *work) +{ + struct hl_device_reset_work *device_reset_work = + container_of(work, struct hl_device_reset_work, reset_work); + struct hl_device *hdev = device_reset_work->hdev; + + device_kill_open_processes(hdev); + hl_device_reset(hdev, true, true); kfree(device_reset_work); @@ -650,7 +663,7 @@ again: * from a dedicated work */ INIT_WORK(&device_reset_work->reset_work, - hl_device_hard_reset_pending); + device_hard_reset_pending); device_reset_work->hdev = hdev; schedule_work(&device_reset_work->reset_work); @@ -1049,6 +1062,15 @@ void hl_device_fini(struct hl_device *hdev) /* Mark device as disabled */ hdev->disabled = true; + /* + * Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + device_kill_open_processes(hdev); + hl_hwmon_fini(hdev); device_late_fini(hdev); From 3f5398cfbf051dc1850b4f64fbe5267cbd699ce0 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 6 Apr 2019 15:41:35 +0300 Subject: [PATCH 30/31] habanalabs: improve IOCTLs behavior when disabled or reset This patch makes some improvement in how IOCTLs behave when the device is disabled or under reset. The new code checks, at the start of every IOCTL, if the device is disabled or in reset. If so, it prints an appropriate kernel message and returns -EBUSY to user-space. In addition, the code modifies the location of where the hard_reset_pending flag is being set or cleared: 1. It is now cleared immediately after the reset *tear-down* flow is finished but before the re-initialization flow begins. 2. It is being set in the remove function of the device, to make the behavior the same with the hard-reset flow There are two exceptions to the disable or in reset check: 1. The HL_INFO_DEVICE_STATUS opcode in the INFO IOCTL. This opcode allows the user to inquire about the status of the device, whether it is operational, in reset or malfunction (disabled). If the driver will block this IOCTL, the user won't be able to retrieve the status in case of malfunction or in reset. 2. The WAIT_FOR_CS IOCTL. This IOCTL allows the user to inquire about the status of a CS. We want to allow the user to continue to do so, even if we started a soft-reset process because it will allow the user to get the correct error code for each CS he submitted. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_buffer.c | 7 +++++++ drivers/misc/habanalabs/device.c | 9 +++++---- drivers/misc/habanalabs/habanalabs_ioctl.c | 3 ++- drivers/misc/habanalabs/memory.c | 3 ++- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c index 85f75806a9a7..b1ffca47d748 100644 --- a/drivers/misc/habanalabs/command_buffer.c +++ b/drivers/misc/habanalabs/command_buffer.c @@ -214,6 +214,13 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) u64 handle; int rc; + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute CB IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + switch (args->in.op) { case HL_CB_OP_CREATE: rc = hl_cb_create(hdev, &hpriv->cb_mgr, args->in.cb_size, diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 56d3ba53c661..25bfb093ff26 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -641,6 +641,8 @@ again: if ((hard_reset) && (!from_hard_reset_thread)) { struct hl_device_reset_work *device_reset_work; + hdev->hard_reset_pending = true; + if (!hdev->pdev) { dev_err(hdev->dev, "Reset action is NOT supported in simulator\n"); @@ -648,8 +650,6 @@ again: goto out_err; } - hdev->hard_reset_pending = true; - device_reset_work = kzalloc(sizeof(*device_reset_work), GFP_ATOMIC); if (!device_reset_work) { @@ -718,6 +718,7 @@ again: if (hard_reset) { hdev->device_cpu_disabled = false; + hdev->hard_reset_pending = false; if (hdev->kernel_ctx) { dev_crit(hdev->dev, @@ -779,8 +780,6 @@ again: } hl_set_max_power(hdev, hdev->max_power); - - hdev->hard_reset_pending = false; } else { rc = hdev->asic_funcs->soft_reset_late_init(hdev); if (rc) { @@ -1069,6 +1068,8 @@ void hl_device_fini(struct hl_device *hdev) hdev->asic_funcs->hw_queues_lock(hdev); hdev->asic_funcs->hw_queues_unlock(hdev); + hdev->hard_reset_pending = true; + device_kill_open_processes(hdev); hl_hwmon_fini(hdev); diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 810e65446075..eeefb22023e9 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -202,7 +202,8 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) if (hl_device_disabled_or_in_reset(hdev)) { dev_warn_ratelimited(hdev->dev, - "Device is disabled or in reset. Can't execute INFO IOCTL\n"); + "Device is %s. Can't execute INFO IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); return -EBUSY; } diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 05919f913300..43ef3ad8438a 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -1159,7 +1159,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) if (hl_device_disabled_or_in_reset(hdev)) { dev_warn_ratelimited(hdev->dev, - "Device is disabled or in reset. Can't execute memory IOCTL\n"); + "Device is %s. Can't execute MEMORY IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); return -EBUSY; } From 9f201aba56b92c3daa4b76efae056ddbb80d91e6 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 6 Apr 2019 15:33:38 +0300 Subject: [PATCH 31/31] habanalabs: prevent device PTE read/write during hard-reset During hard-reset, contexts are closed as part of the tear-down process. After a context is closed, the driver cleans up the page tables of that context in the device's DRAM. This action is both dangerous and unnecessary. It is unnecessary, because the device is going through a hard-reset, which means the device's DRAM contents are no longer valid and the device's MMU is being reset. It is dangerous, because if the hard-reset came as a result of a PCI freeze, this action may cause the entire host machine to hang. Therefore, prevent all device PTE updates when a hard-reset operation is pending. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index aaa05662ca9f..5100dfbf3acc 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4058,6 +4058,9 @@ static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; + if (hdev->hard_reset_pending) + return U64_MAX; + return readq(hdev->pcie_bar[DDR_BAR_ID] + (addr - goya->ddr_bar_cur_addr)); } @@ -4066,6 +4069,9 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) { struct goya_device *goya = hdev->asic_specific; + if (hdev->hard_reset_pending) + return; + writeq(val, hdev->pcie_bar[DDR_BAR_ID] + (addr - goya->ddr_bar_cur_addr)); }