habanalabs: add support for new cpucp return codes

Firmware now responds with a more detailed cpucp return codes.
Driver can now distinguish between error and debug return codes.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2022-08-23 16:58:38 +03:00 committed by Oded Gabbay
parent a0fc8688c0
commit 0626fa1a4d
4 changed files with 51 additions and 4 deletions

View File

@ -252,7 +252,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
struct hl_bd *sent_bd;
u32 tmp, expected_ack_val, pi;
u32 tmp, expected_ack_val, pi, opcode;
int rc;
pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
@ -319,8 +319,35 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
if (rc) {
dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT);
opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
if (!prop->supports_advanced_cpucp_rc) {
dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
goto scrub_descriptor;
}
switch (rc) {
case cpucp_packet_invalid:
dev_err(hdev->dev,
"CPU packet %d is not supported by F/W\n", opcode);
break;
case cpucp_packet_fault:
dev_err(hdev->dev,
"F/W failed processing CPU packet %d\n", opcode);
break;
case cpucp_packet_invalid_pkt:
dev_dbg(hdev->dev,
"CPU packet %d is not supported by F/W\n", opcode);
break;
case cpucp_packet_invalid_params:
dev_err(hdev->dev,
"F/W reports invalid parameters for CPU packet %d\n", opcode);
break;
default:
dev_err(hdev->dev,
"Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode);
}
/* propagate the return code from the f/w to the callers who want to check it */
if (result)
@ -332,6 +359,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
*result = le64_to_cpu(pkt->result);
}
scrub_descriptor:
/* Scrub previous buffer descriptor 'ctl' field which contains the
* previous PI value written during packet submission.
* We must do this or else F/W can read an old value upon queue wraparound.

View File

@ -678,6 +678,7 @@ struct hl_hints_range {
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
* @supports_user_set_page_size: true if user can set the allocation page size.
* @dma_mask: the dma mask to be set for this device
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@ -785,6 +786,7 @@ struct asic_fixed_properties {
u8 set_max_power_on_device_init;
u8 supports_user_set_page_size;
u8 dma_mask;
u8 supports_advanced_cpucp_rc;
};
/**

View File

@ -2721,6 +2721,8 @@ static int gaudi2_late_init(struct hl_device *hdev)
struct gaudi2_device *gaudi2 = hdev->asic_specific;
int rc;
hdev->asic_prop.supports_advanced_cpucp_rc = true;
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
gaudi2->virt_msix_db_dma_addr);
if (rc) {

View File

@ -824,10 +824,25 @@ enum cpucp_led_index {
CPUCP_LED2_INDEX
};
/*
* enum cpucp_packet_rc - Error return code
* @cpucp_packet_success -> in case of success.
* @cpucp_packet_invalid -> this is to support Goya and Gaudi platform.
* @cpucp_packet_fault -> in case of processing error like failing to
* get device binding or semaphore etc.
* @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is
* supported Greco onwards.
* @cpucp_packet_invalid_params -> when checking parameter like length of buffer
* or attribute value etc. Supported Greco onwards.
* @cpucp_packet_rc_max -> It indicates size of enum so should be at last.
*/
enum cpucp_packet_rc {
cpucp_packet_success,
cpucp_packet_invalid,
cpucp_packet_fault
cpucp_packet_fault,
cpucp_packet_invalid_pkt,
cpucp_packet_invalid_params,
cpucp_packet_rc_max
};
/*