server: defer task when no slot is available
This commit is contained in:
parent
682986a08e
commit
bf0daf49d6
1 changed files with 9 additions and 2 deletions
|
@ -1558,6 +1558,7 @@ struct llama_server_context
|
||||||
void process_tasks()
|
void process_tasks()
|
||||||
{
|
{
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
std::vector<task_server> deferred_tasks;
|
||||||
while (!queue_tasks.empty())
|
while (!queue_tasks.empty())
|
||||||
{
|
{
|
||||||
task_server task = queue_tasks.front();
|
task_server task = queue_tasks.front();
|
||||||
|
@ -1569,8 +1570,8 @@ struct llama_server_context
|
||||||
if (slot == nullptr)
|
if (slot == nullptr)
|
||||||
{
|
{
|
||||||
LOG_TEE("slot unavailable\n");
|
LOG_TEE("slot unavailable\n");
|
||||||
// send error result
|
// if no slot is available, we defer this task for processing later
|
||||||
send_error(task, "slot unavailable");
|
deferred_tasks.push_back(task);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1616,6 +1617,12 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add all the deferred tasks back the the queue
|
||||||
|
for (task_server &task : deferred_tasks)
|
||||||
|
{
|
||||||
|
queue_tasks.push_back(task);
|
||||||
|
}
|
||||||
|
|
||||||
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
|
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
|
||||||
std::vector<task_result> agg_results;
|
std::vector<task_result> agg_results;
|
||||||
auto queue_iterator = queue_multitasks.begin();
|
auto queue_iterator = queue_multitasks.begin();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue