server: only add back deferred tasks when one slot is available
This commit is contained in:
parent
1bd867894d
commit
d083c81761
2 changed files with 12 additions and 10 deletions
|
@ -1383,6 +1383,7 @@ struct llama_server_context
|
||||||
slot.t_last_used = ggml_time_us();
|
slot.t_last_used = ggml_time_us();
|
||||||
|
|
||||||
LOG_TEE("slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
LOG_TEE("slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||||
|
queue_tasks.notify_slot_changed();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -209,7 +209,7 @@ struct llama_server_queue {
|
||||||
return task.id;
|
return task.id;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a new task, but defer until the next loop
|
// Add a new task, but defer until one slot is available
|
||||||
void defer(task_server task) {
|
void defer(task_server task) {
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
queue_tasks_deferred.push_back(std::move(task));
|
queue_tasks_deferred.push_back(std::move(task));
|
||||||
|
@ -236,6 +236,16 @@ struct llama_server_queue {
|
||||||
callback_all_task_finished = callback;
|
callback_all_task_finished = callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Call when the state of one slot is changed
|
||||||
|
void notify_slot_changed() {
|
||||||
|
// move deferred tasks back to main loop
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
for (auto & task : queue_tasks_deferred) {
|
||||||
|
queue_tasks.push_back(std::move(task));
|
||||||
|
}
|
||||||
|
queue_tasks_deferred.clear();
|
||||||
|
}
|
||||||
|
|
||||||
// Start the main loop. This call is blocking
|
// Start the main loop. This call is blocking
|
||||||
void start_loop() {
|
void start_loop() {
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -255,15 +265,6 @@ struct llama_server_queue {
|
||||||
LOG_VERBOSE("callback_new_task", {});
|
LOG_VERBOSE("callback_new_task", {});
|
||||||
callback_new_task(task);
|
callback_new_task(task);
|
||||||
}
|
}
|
||||||
// move deferred tasks back to main loop
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
|
||||||
for (auto & task : queue_tasks_deferred) {
|
|
||||||
queue_tasks.push_back(std::move(task));
|
|
||||||
}
|
|
||||||
queue_tasks_deferred.clear();
|
|
||||||
lock.unlock();
|
|
||||||
}
|
|
||||||
LOG_VERBOSE("callback_all_task_finished", {});
|
LOG_VERBOSE("callback_all_task_finished", {});
|
||||||
// process and update all the multitasks
|
// process and update all the multitasks
|
||||||
auto queue_iterator = queue_multitasks.begin();
|
auto queue_iterator = queue_multitasks.begin();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue