linux-stable/drivers/accel/habanalabs/common/irq.c
farah kassabri ba24b5ec78 accel/habanalabs: split user interrupts pending list
Currently driver maintain one list for both pending user interrupts
which seeks to wait till CQ reaches it's target value and also the ones
that seeks to get timestamp records when the CQ reaches it's target
value.
This causes delay in handling the waiters which gets higher priority
than the timestamp records.
In order to solve this, let's split the list into two,
one for each case and each one is protected by it's own spinlock.
Waiters will be handled within the interrupt context first,
then the timestamp records will be set.
Freeing the timestamp related memory will be handled in a workqueue.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2023-10-09 12:37:22 +03:00

697 lines
18 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
#include <linux/slab.h>
/**
* struct hl_eqe_work - This structure is used to schedule work of EQ
* entry and cpucp_reset event
*
* @eq_work: workqueue object to run when EQ entry is received
* @hdev: pointer to device structure
* @eq_entry: copy of the EQ entry
*/
struct hl_eqe_work {
struct work_struct eq_work;
struct hl_device *hdev;
struct hl_eq_entry eq_entry;
};
/**
* hl_cq_inc_ptr - increment ci or pi of cq
*
* @ptr: the current ci or pi value of the completion queue
*
* Increment ptr by 1. If it reaches the number of completion queue
* entries, set it to 0
*/
inline u32 hl_cq_inc_ptr(u32 ptr)
{
ptr++;
if (unlikely(ptr == HL_CQ_LENGTH))
ptr = 0;
return ptr;
}
/**
* hl_eq_inc_ptr - increment ci of eq
*
* @ptr: the current ci value of the event queue
*
* Increment ptr by 1. If it reaches the number of event queue
* entries, set it to 0
*/
static inline u32 hl_eq_inc_ptr(u32 ptr)
{
ptr++;
if (unlikely(ptr == HL_EQ_LENGTH))
ptr = 0;
return ptr;
}
static void irq_handle_eqe(struct work_struct *work)
{
struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
eq_work);
struct hl_device *hdev = eqe_work->hdev;
hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
kfree(eqe_work);
}
/**
* job_finish - queue job finish work
*
* @hdev: pointer to device structure
* @cs_seq: command submission sequence
* @cq: completion queue
* @timestamp: interrupt timestamp
*
*/
static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq, ktime_t timestamp)
{
struct hl_hw_queue *queue;
struct hl_cs_job *job;
queue = &hdev->kernel_queues[cq->hw_queue_id];
job = queue->shadow_queue[hl_pi_2_offset(cs_seq)];
job->timestamp = timestamp;
queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
atomic_inc(&queue->ci);
}
/**
* cs_finish - queue all cs jobs finish work
*
* @hdev: pointer to device structure
* @cs_seq: command submission sequence
* @timestamp: interrupt timestamp
*
*/
static void cs_finish(struct hl_device *hdev, u16 cs_seq, ktime_t timestamp)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_hw_queue *queue;
struct hl_cs *cs;
struct hl_cs_job *job;
cs = hdev->shadow_cs_queue[cs_seq & (prop->max_pending_cs - 1)];
if (!cs) {
dev_warn(hdev->dev,
"No pointer to CS in shadow array at index %d\n",
cs_seq);
return;
}
list_for_each_entry(job, &cs->job_list, cs_node) {
queue = &hdev->kernel_queues[job->hw_queue_id];
atomic_inc(&queue->ci);
}
cs->completion_timestamp = timestamp;
queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
}
/**
* hl_irq_handler_cq - irq handler for completion queue
*
* @irq: irq number
* @arg: pointer to completion queue structure
*
*/
irqreturn_t hl_irq_handler_cq(int irq, void *arg)
{
struct hl_cq *cq = arg;
struct hl_device *hdev = cq->hdev;
bool shadow_index_valid, entry_ready;
u16 shadow_index;
struct hl_cq_entry *cq_entry, *cq_base;
ktime_t timestamp = ktime_get();
if (hdev->disabled) {
dev_dbg(hdev->dev,
"Device disabled but received IRQ %d for CQ %d\n",
irq, cq->hw_queue_id);
return IRQ_HANDLED;
}
cq_base = cq->kernel_address;
while (1) {
cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
entry_ready = !!FIELD_GET(CQ_ENTRY_READY_MASK,
le32_to_cpu(cq_entry->data));
if (!entry_ready)
break;
/* Make sure we read CQ entry contents after we've
* checked the ownership bit.
*/
dma_rmb();
shadow_index_valid =
!!FIELD_GET(CQ_ENTRY_SHADOW_INDEX_VALID_MASK,
le32_to_cpu(cq_entry->data));
shadow_index = FIELD_GET(CQ_ENTRY_SHADOW_INDEX_MASK,
le32_to_cpu(cq_entry->data));
/*
* CQ interrupt handler has 2 modes of operation:
* 1. Interrupt per CS completion: (Single CQ for all queues)
* CQ entry represents a completed CS
*
* 2. Interrupt per CS job completion in queue: (CQ per queue)
* CQ entry represents a completed job in a certain queue
*/
if (shadow_index_valid && !hdev->disabled) {
if (hdev->asic_prop.completion_mode ==
HL_COMPLETION_MODE_CS)
cs_finish(hdev, shadow_index, timestamp);
else
job_finish(hdev, shadow_index, cq, timestamp);
}
/* Clear CQ entry ready bit */
cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
~CQ_ENTRY_READY_MASK);
cq->ci = hl_cq_inc_ptr(cq->ci);
/* Increment free slots */
atomic_inc(&cq->free_slots_cnt);
}
return IRQ_HANDLED;
}
/*
* hl_ts_free_objects - handler of the free objects workqueue.
* This function should put refcount to objects that the registration node
* took refcount to them.
* @work: workqueue object pointer
*/
static void hl_ts_free_objects(struct work_struct *work)
{
struct timestamp_reg_work_obj *job =
container_of(work, struct timestamp_reg_work_obj, free_obj);
struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
struct list_head *free_list_head = job->free_obj_head;
struct hl_device *hdev = job->hdev;
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
dev_dbg(hdev->dev, "About to put refcount to buf (%p) cq_cb(%p)\n",
free_obj->buf,
free_obj->cq_cb);
hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
atomic_set(&free_obj->in_use, 0);
}
kfree(free_list_head);
if (dynamic_alloc_free_list_head) {
list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
free_objects_node) {
dev_dbg(hdev->dev,
"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
free_obj->buf,
free_obj->cq_cb);
hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
list_del(&free_obj->free_objects_node);
kfree(free_obj);
}
kfree(dynamic_alloc_free_list_head);
}
kfree(job);
}
/*
* This function called with spin_lock of wait_list_lock taken
* This function will set timestamp and delete the registration node from the
* wait_list_lock.
* and since we're protected with spin_lock here, so we cannot just put the refcount
* for the objects here, since the release function may be called and it's also a long
* logic (which might sleep also) that cannot be handled in irq context.
* so here we'll be filling a list with nodes of "put" jobs and then will send this
* list to a dedicated workqueue to do the actual put.
*/
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
struct list_head **free_list,
struct list_head **dynamic_alloc_list,
struct hl_user_interrupt *intr)
{
struct hl_ts_free_jobs *ts_free_jobs_data;
struct timestamp_reg_free_node *free_node;
u32 free_node_index;
u64 timestamp;
ts_free_jobs_data = &intr->ts_free_jobs_data;
free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
if (!(*free_list)) {
/* Alloc/Init the timestamp registration free objects list */
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
if (!(*free_list))
return -ENOMEM;
INIT_LIST_HEAD(*free_list);
}
free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
dev_dbg(hdev->dev,
"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
pend->ts_reg_info.buf,
pend,
intr->interrupt_id);
if (!(*dynamic_alloc_list)) {
*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
if (!(*dynamic_alloc_list))
return -ENOMEM;
INIT_LIST_HEAD(*dynamic_alloc_list);
}
free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
if (!free_node)
return -ENOMEM;
free_node->dynamic_alloc = 1;
}
timestamp = ktime_to_ns(intr->timestamp);
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
list_del(&pend->list_node);
/* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list.
*/
free_node->buf = pend->ts_reg_info.buf;
free_node->cq_cb = pend->ts_reg_info.cq_cb;
if (free_node->dynamic_alloc) {
list_add(&free_node->free_objects_node, *dynamic_alloc_list);
} else {
ts_free_jobs_data->next_avail_free_node_idx =
(++free_node_index) % ts_free_jobs_data->free_nodes_length;
list_add(&free_node->free_objects_node, *free_list);
}
/* Mark TS record as free */
pend->ts_reg_info.in_use = false;
return 0;
}
static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{
struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
struct hl_user_pending_interrupt *pend, *temp_pend;
struct timestamp_reg_work_obj *job;
bool reg_node_handle_fail = false;
unsigned long flags;
int rc;
/* For registration nodes:
* As part of handling the registration nodes, we should put refcount to
* some objects. the problem is that we cannot do that under spinlock
* or in irq handler context at all (since release functions are long and
* might sleep), so we will need to handle that part in workqueue context.
* To avoid handling kmalloc failure which compels us rolling back actions
* and move nodes hanged on the free list back to the interrupt ts list
* we always alloc the job of the WQ at the beginning.
*/
job = kmalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
return;
spin_lock_irqsave(&intr->ts_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head,
&dynamic_alloc_list_head, intr);
if (rc)
reg_node_handle_fail = true;
}
}
}
spin_unlock_irqrestore(&intr->ts_list_lock, flags);
if (ts_reg_free_list_head) {
INIT_WORK(&job->free_obj, hl_ts_free_objects);
job->free_obj_head = ts_reg_free_list_head;
job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
job->hdev = hdev;
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
} else {
kfree(job);
}
}
static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{
struct hl_user_pending_interrupt *pend, *temp_pend;
unsigned long flags;
spin_lock_irqsave(&intr->wait_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
/* Handle wait target value node */
pend->fence.timestamp = intr->timestamp;
complete_all(&pend->fence.completion);
}
}
spin_unlock_irqrestore(&intr->wait_list_lock, flags);
}
static void handle_tpc_interrupt(struct hl_device *hdev)
{
u64 event_mask;
u32 flags;
event_mask = HL_NOTIFIER_EVENT_TPC_ASSERT |
HL_NOTIFIER_EVENT_USER_ENGINE_ERR |
HL_NOTIFIER_EVENT_DEVICE_RESET;
flags = HL_DRV_RESET_DELAY;
dev_err_ratelimited(hdev->dev, "Received TPC assert\n");
hl_device_cond_reset(hdev, flags, event_mask);
}
static void handle_unexpected_user_interrupt(struct hl_device *hdev)
{
dev_err_ratelimited(hdev->dev, "Received unexpected user error interrupt\n");
}
/**
* hl_irq_user_interrupt_handler - irq handler for user interrupts.
*
* @irq: irq number
* @arg: pointer to user interrupt structure
*/
irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
{
struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get();
switch (user_int->type) {
case HL_USR_INTERRUPT_CQ:
/* First handle user waiters threads */
handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_wait_list(hdev, user_int);
/* Second handle user timestamp registrations */
handle_user_interrupt_ts_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_ts_list(hdev, user_int);
break;
case HL_USR_INTERRUPT_DECODER:
handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt_wait_list(hdev, user_int);
break;
default:
break;
}
return IRQ_HANDLED;
}
/**
* hl_irq_user_interrupt_thread_handler - irq thread handler for user interrupts.
* This function is invoked by threaded irq mechanism
*
* @irq: irq number
* @arg: pointer to user interrupt structure
*
*/
irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
{
struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get();
switch (user_int->type) {
case HL_USR_INTERRUPT_TPC:
handle_tpc_interrupt(hdev);
break;
case HL_USR_INTERRUPT_UNEXPECTED:
handle_unexpected_user_interrupt(hdev);
break;
default:
break;
}
return IRQ_HANDLED;
}
irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
{
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct hl_device *hdev = arg;
dev_err(hdev->dev, "EQ error interrupt received\n");
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
return IRQ_HANDLED;
}
/**
* hl_irq_handler_eq - irq handler for event queue
*
* @irq: irq number
* @arg: pointer to event queue structure
*
*/
irqreturn_t hl_irq_handler_eq(int irq, void *arg)
{
struct hl_eq *eq = arg;
struct hl_device *hdev = eq->hdev;
struct hl_eq_entry *eq_entry;
struct hl_eq_entry *eq_base;
struct hl_eqe_work *handle_eqe_work;
bool entry_ready;
u32 cur_eqe, ctl;
u16 cur_eqe_index, event_type;
eq_base = eq->kernel_address;
while (1) {
cur_eqe = le32_to_cpu(eq_base[eq->ci].hdr.ctl);
entry_ready = !!FIELD_GET(EQ_CTL_READY_MASK, cur_eqe);
if (!entry_ready)
break;
cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe);
if ((hdev->event_queue.check_eqe_index) &&
(((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) != cur_eqe_index)) {
dev_err(hdev->dev,
"EQE %#x in queue is ready but index does not match %d!=%d",
cur_eqe,
((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK),
cur_eqe_index);
break;
}
eq->prev_eqe_index++;
eq_entry = &eq_base[eq->ci];
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
dma_rmb();
if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
ctl = le32_to_cpu(eq_entry->hdr.ctl);
event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
dev_warn(hdev->dev,
"Device disabled but received an EQ event (%u)\n", event_type);
goto skip_irq;
}
handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
if (handle_eqe_work) {
INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
handle_eqe_work->hdev = hdev;
memcpy(&handle_eqe_work->eq_entry, eq_entry,
sizeof(*eq_entry));
queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
}
skip_irq:
/* Clear EQ entry ready bit */
eq_entry->hdr.ctl =
cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
~EQ_CTL_READY_MASK);
eq->ci = hl_eq_inc_ptr(eq->ci);
hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
}
return IRQ_HANDLED;
}
/**
* hl_irq_handler_dec_abnrm - Decoder error interrupt handler
* @irq: IRQ number
* @arg: pointer to decoder structure.
*/
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg)
{
struct hl_dec *dec = arg;
schedule_work(&dec->abnrm_intr_work);
return IRQ_HANDLED;
}
/**
* hl_cq_init - main initialization function for an cq object
*
* @hdev: pointer to device structure
* @q: pointer to cq structure
* @hw_queue_id: The H/W queue ID this completion queue belongs to
* HL_INVALID_QUEUE if cq is not attached to any specific queue
*
* Allocate dma-able memory for the completion queue and initialize fields
* Returns 0 on success
*/
int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
{
void *p;
p = hl_asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, &q->bus_address,
GFP_KERNEL | __GFP_ZERO);
if (!p)
return -ENOMEM;
q->hdev = hdev;
q->kernel_address = p;
q->hw_queue_id = hw_queue_id;
q->ci = 0;
q->pi = 0;
atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
return 0;
}
/**
* hl_cq_fini - destroy completion queue
*
* @hdev: pointer to device structure
* @q: pointer to cq structure
*
* Free the completion queue memory
*/
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
{
hl_asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, q->kernel_address, q->bus_address);
}
void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
{
q->ci = 0;
q->pi = 0;
atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
/*
* It's not enough to just reset the PI/CI because the H/W may have
* written valid completion entries before it was halted and therefore
* we need to clean the actual queues so we won't process old entries
* when the device is operational again
*/
memset(q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES);
}
/**
* hl_eq_init - main initialization function for an event queue object
*
* @hdev: pointer to device structure
* @q: pointer to eq structure
*
* Allocate dma-able memory for the event queue and initialize fields
* Returns 0 on success
*/
int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
{
void *p;
p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_EQ_SIZE_IN_BYTES, &q->bus_address);
if (!p)
return -ENOMEM;
q->hdev = hdev;
q->kernel_address = p;
q->ci = 0;
q->prev_eqe_index = 0;
return 0;
}
/**
* hl_eq_fini - destroy event queue
*
* @hdev: pointer to device structure
* @q: pointer to eq structure
*
* Free the event queue memory
*/
void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
{
flush_workqueue(hdev->eq_wq);
hl_cpu_accessible_dma_pool_free(hdev, HL_EQ_SIZE_IN_BYTES, q->kernel_address);
}
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
{
q->ci = 0;
q->prev_eqe_index = 0;
/*
* It's not enough to just reset the PI/CI because the H/W may have
* written valid completion entries before it was halted and therefore
* we need to clean the actual queues so we won't process old entries
* when the device is operational again
*/
memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
}