This tag contains the following changes for 5.12-rc1:

- Improve communication protocol with device CPU CP application.
   The change prevents random (rare) out-of-sync errors.
 
 - Notify F/W to start sending events only after initialization of
   device is done. This fixes the issue where fatal events were received
   but ignored.
 
 - Fix integer handling (static analysis warning).
 
 - Always fetch HBM ECC errors from F/W (if available).
 
 - Minor fix in GAUDI-specific initialization code.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCgAxFiEE7TEboABC71LctBLFZR1NuKta54AFAmAhZjITHG9nYWJiYXlA
 a2VybmVsLm9yZwAKCRBlHU24q1rngLg4B/9LWydC5nBf1RrJb9hbJjWYHfJxtA+0
 SLwEnNCmUQ8UqfIsXEpzr0u1nZeali/jPXvH5ohkPAxahv83o4q2cVT3FdQbaHXP
 InsdtCjxole+Ne2hQpR7yWBZrqU8n5msN/e3OsRfNFGv5jmafIEhn9Ef+DMILiF2
 yt+yBdoQMHg0TZE/pZcGedFumf4tPMaQcp83krp7Ma4EzqDvMJwDexcfbpla2tuZ
 tnndKNYB60p6YAdfDmvD+7nXxpA5JnnkXcZNBBRw3gce62q90yt2bRTEEHXrkaOj
 lERhksXHy8VeMGuLfd6BebBzNOSjl2HN+LnSBwGUM6obnr6gt7df2Xn5
 =/k5r
 -----END PGP SIGNATURE-----

Merge tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains the following changes for 5.12-rc1:

- Improve communication protocol with device CPU CP application.
  The change prevents random (rare) out-of-sync errors.

- Notify F/W to start sending events only after initialization of
  device is done. This fixes the issue where fatal events were received
  but ignored.

- Fix integer handling (static analysis warning).

- Always fetch HBM ECC errors from F/W (if available).

- Minor fix in GAUDI-specific initialization code.

* tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
  habanalabs/gaudi: don't enable clock gating on DMA5
  habanalabs: return block size + block ID
  habanalabs: update security map after init CPU Qs
  habanalabs: enable F/W events after init done
  habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR
  habanalabs: support fetching first available user CQ
  habanalabs: improve communication protocol with cpucp
  habanalabs: fix integer handling issue
This commit is contained in:
Greg Kroah-Hartman 2021-02-09 09:33:51 +01:00
commit e3e3eaab2b
10 changed files with 149 additions and 40 deletions

View file

@ -1159,12 +1159,20 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
atomic_set(&hdev->in_reset, 0);
hdev->needs_reset = false;
if (hard_reset)
hdev->hard_reset_cnt++;
else
hdev->soft_reset_cnt++;
dev_notice(hdev->dev, "Successfully finished resetting the device\n");
dev_warn(hdev->dev, "Successfully finished resetting the device\n");
if (hard_reset) {
hdev->hard_reset_cnt++;
/* After reset is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events
* and if those events are fatal, we won't know about it and
* the device will be operational although it shouldn't be
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
} else {
hdev->soft_reset_cnt++;
}
return 0;
@ -1415,6 +1423,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->init_done = true;
/* After initialization is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events and if
* those events are fatal, we won't know about it and the device will
* be operational although it shouldn't be
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
return 0;
release_ctx:

View file

@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u16 len, u32 timeout, u64 *result)
{
struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
u32 tmp;
u32 tmp, expected_ack_val;
int rc = 0;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
@ -115,14 +116,23 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto out;
}
/* set fence to a non valid value */
pkt->fence = UINT_MAX;
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
if (rc) {
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
goto out;
}
if (hdev->asic_prop.fw_app_security_map &
CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
expected_ack_val = queue->pi;
else
expected_ack_val = CPUCP_PACKET_FENCE_VAL;
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
(tmp == CPUCP_PACKET_FENCE_VAL), 1000,
(tmp == expected_ack_val), 1000,
timeout, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);

View file

@ -411,6 +411,7 @@ struct hl_mmu_properties {
* @first_available_user_mon: first monitor available for the user
* @first_available_user_msix_interrupt: first available msix interrupt
* reserved for the user
* @first_available_cq: first available CQ for the user.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware,
@ -473,6 +474,7 @@ struct asic_fixed_properties {
u16 first_available_user_sob[HL_MAX_DCORES];
u16 first_available_user_mon[HL_MAX_DCORES];
u16 first_available_user_msix_interrupt;
u16 first_available_cq[HL_MAX_DCORES];
u8 tpc_enabled_mask;
u8 completion_queues_count;
u8 fw_security_disabled;
@ -855,12 +857,18 @@ enum div_select_defs {
* and place them in the relevant cs jobs
* @collective_wait_create_jobs: allocate collective wait cs jobs
* @scramble_addr: Routine to scramble the address prior of mapping it
* in the MMU.
* in the MMU.
* @descramble_addr: Routine to de-scramble the address prior of
* showing it to users.
* showing it to users.
* @ack_protection_bits_errors: ack and dump all security violations
* @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
* also returns the size of the block if caller supplies
* a valid pointer for it
* @hw_block_mmap: mmap a HW block with a given id.
* @enable_events_from_fw: send interrupt to firmware to notify them the
* driver is ready to receive asynchronous events. This
* function should be called during the first init and
* after every hard-reset of the device
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@ -974,9 +982,10 @@ struct hl_asic_funcs {
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
u32 *block_id);
u32 *block_size, u32 *block_id);
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
};

View file

@ -397,7 +397,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
prop->first_available_user_sob[args->dcore_id];
sm_info.first_available_monitor =
prop->first_available_user_mon[args->dcore_id];
sm_info.first_available_cq =
prop->first_available_cq[args->dcore_id];
return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
sizeof(sm_info))) ? -EFAULT : 0;

View file

@ -1289,12 +1289,13 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc;
}
static int map_block(struct hl_device *hdev, u64 address, u64 *handle)
static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
u32 *size)
{
u32 block_id = 0;
int rc;
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id);
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
*handle = block_id | HL_MMAP_TYPE_BLOCK;
*handle <<= PAGE_SHIFT;
@ -1371,7 +1372,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0;
u32 handle = 0;
u32 handle = 0, block_size;
int rc;
switch (args->in.op) {
@ -1416,8 +1417,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr,
&block_handle);
args->out.handle = block_handle;
&block_handle, &block_size);
args->out.block_handle = block_handle;
args->out.block_size = block_size;
break;
default:
@ -1437,7 +1439,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0;
u32 handle = 0;
u32 handle = 0, block_size;
int rc;
if (!hl_device_operational(hdev, &status)) {
@ -1524,8 +1526,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr,
&block_handle);
args->out.handle = block_handle;
&block_handle, &block_size);
args->out.block_handle = block_handle;
args->out.block_size = block_size;
break;
default:

View file

@ -507,7 +507,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
p = (char *)p + hop0_shift_off;
p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
hop_shift = *(u64 *)p;
offset_mask = (1 << hop_shift) - 1;
offset_mask = (1ull << hop_shift) - 1;
addr_mask = ~(offset_mask);
*phys_addr = (tmp_phys_addr & addr_mask) |
(virt_addr & offset_mask);

View file

@ -529,6 +529,9 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->first_available_user_msix_interrupt = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
/* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
@ -1379,8 +1382,6 @@ static int gaudi_late_init(struct hl_device *hdev)
return rc;
}
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
rc = gaudi_fetch_psoc_frequency(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
@ -3459,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
enable = !!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i])));
/* GC sends work to DMA engine through Upper CP in DMA5 so
* we need to not enable clock gating in that DMA
*/
if (i == GAUDI_HBM_DMA_4)
enable = 0;
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
@ -3725,6 +3732,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_eq *eq;
u32 status;
struct hl_hw_queue *cpu_pq =
@ -3781,6 +3789,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
return -EIO;
}
/* update FW application security bits */
if (prop->fw_security_status_valid)
prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
}
@ -4438,9 +4450,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
/* ring the doorbell */
WREG32(db_reg_offset, db_value);
if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
/* make sure device CPU will read latest data from host */
mb();
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GAUDI_EVENT_PI_UPDATE);
}
}
static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
@ -7098,7 +7113,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
int err = 0;
if (!hdev->asic_prop.fw_security_disabled) {
if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_app_security_map &
CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
if (!hbm_ecc_data) {
dev_err(hdev->dev, "No FW ECC data");
return 0;
@ -7120,14 +7137,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
dev_err(hdev->dev,
"HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
device, ch, type, wr_par, rd_par, ca_par, serr, derr);
"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
device, ch, wr_par, rd_par, ca_par, serr, derr);
dev_err(hdev->dev,
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
device, ch, hbm_ecc_data->first_addr, type,
hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
hbm_ecc_data->dec_cnt);
err = 1;
return 0;
}
if (!hdev->asic_prop.fw_security_disabled) {
dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
return 0;
}
base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
@ -8469,7 +8496,7 @@ static u64 gaudi_get_device_time(struct hl_device *hdev)
}
static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
u32 *block_id)
u32 *block_size, u32 *block_id)
{
return -EPERM;
}
@ -8481,6 +8508,11 @@ static int gaudi_block_mmap(struct hl_device *hdev,
return -EPERM;
}
static void gaudi_enable_events_from_fw(struct hl_device *hdev)
{
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
}
static const struct hl_asic_funcs gaudi_funcs = {
.early_init = gaudi_early_init,
.early_fini = gaudi_early_fini,
@ -8562,7 +8594,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.descramble_addr = hl_mmu_descramble_addr,
.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
.get_hw_block_id = gaudi_get_hw_block_id,
.hw_block_mmap = gaudi_block_mmap
.hw_block_mmap = gaudi_block_mmap,
.enable_events_from_fw = gaudi_enable_events_from_fw
};
/**

View file

@ -457,6 +457,9 @@ int goya_get_fixed_properties(struct hl_device *hdev)
prop->first_available_user_msix_interrupt = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
/* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
@ -794,9 +797,6 @@ int goya_late_init(struct hl_device *hdev)
return rc;
}
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
return 0;
}
@ -1188,6 +1188,7 @@ static int goya_stop_external_queues(struct hl_device *hdev)
int goya_init_cpu_queues(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_eq *eq;
u32 status;
struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
@ -1240,6 +1241,10 @@ int goya_init_cpu_queues(struct hl_device *hdev)
return -EIO;
}
/* update FW application security bits */
if (prop->fw_security_status_valid)
prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
goya->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
}
@ -2806,9 +2811,12 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
/* ring the doorbell */
WREG32(db_reg_offset, db_value);
if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
/* make sure device CPU will read latest data from host */
mb();
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
}
}
void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
@ -5382,7 +5390,7 @@ static void goya_ctx_fini(struct hl_ctx *ctx)
}
static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
u32 *block_id)
u32 *block_size, u32 *block_id)
{
return -EPERM;
}
@ -5393,6 +5401,12 @@ static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
return -EPERM;
}
static void goya_enable_events_from_fw(struct hl_device *hdev)
{
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
}
static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init,
.early_fini = goya_early_fini,
@ -5474,7 +5488,8 @@ static const struct hl_asic_funcs goya_funcs = {
.descramble_addr = hl_mmu_descramble_addr,
.ack_protection_bits_errors = goya_ack_protection_bits_errors,
.get_hw_block_id = goya_get_hw_block_id,
.hw_block_mmap = goya_block_mmap
.hw_block_mmap = goya_block_mmap,
.enable_events_from_fw = goya_enable_events_from_fw
};
/*

View file

@ -166,6 +166,10 @@
* FW handles HBM ECC indications.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd
* is set to the PI counter.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the
* running FW populates the device status
@ -190,6 +194,7 @@
#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12)
#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13)
#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14)
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
enum cpu_boot_status {

View file

@ -414,10 +414,13 @@ struct hl_pll_frequency_info {
* struct hl_info_sync_manager - sync manager information
* @first_available_sync_object: first available sob
* @first_available_monitor: first available monitor
* @first_available_cq: first available cq
*/
struct hl_info_sync_manager {
__u32 first_available_sync_object;
__u32 first_available_monitor;
__u32 first_available_cq;
__u32 reserved;
};
/**
@ -779,10 +782,10 @@ struct hl_mem_in {
/* HL_MEM_OP_MAP_BLOCK - map a hw block */
struct {
/*
* HW block address to map, a handle will be returned
* to the user and will be used to mmap the relevant
* block. Only addresses from configuration space are
* allowed.
* HW block address to map, a handle and size will be
* returned to the user and will be used to mmap the
* relevant block. Only addresses from configuration
* space are allowed.
*/
__u64 block_addr;
} map_block;
@ -813,11 +816,26 @@ struct hl_mem_out {
__u64 device_virt_addr;
/*
* Used for HL_MEM_OP_ALLOC and HL_MEM_OP_MAP_BLOCK.
* Used in HL_MEM_OP_ALLOC
* This is the assigned handle for the allocated memory
* or mapped block
*/
__u64 handle;
struct {
/*
* Used in HL_MEM_OP_MAP_BLOCK.
* This is the assigned handle for the mapped block
*/
__u64 block_handle;
/*
* Used in HL_MEM_OP_MAP_BLOCK
* This is the size of the mapped block
*/
__u32 block_size;
__u32 pad;
};
};
};