server: defer task when no slot is available

This commit is contained in:
Xuan Son Nguyen 2024-01-18 14:19:52 +01:00
parent 682986a08e
commit bf0daf49d6

View file

@ -1558,6 +1558,7 @@ struct llama_server_context
void process_tasks()
{
std::unique_lock<std::mutex> lock(mutex_tasks);
std::vector<task_server> deferred_tasks;
while (!queue_tasks.empty())
{
task_server task = queue_tasks.front();
@ -1569,8 +1570,8 @@ struct llama_server_context
if (slot == nullptr)
{
LOG_TEE("slot unavailable\n");
// send error result
send_error(task, "slot unavailable");
// if no slot is available, we defer this task for processing later
deferred_tasks.push_back(task);
break;
}
@ -1616,6 +1617,12 @@ struct llama_server_context
}
}
// add all the deferred tasks back the the queue
for (task_server &task : deferred_tasks)
{
queue_tasks.push_back(task);
}
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
std::vector<task_result> agg_results;
auto queue_iterator = queue_multitasks.begin();