pop_deferred_task

This commit is contained in:
Xuan Son Nguyen 2024-09-03 10:34:58 +02:00
parent 446d57d7cd
commit ec882cc1ef

View file

@ -50,6 +50,7 @@ enum stop_type {
STOP_TYPE_PARTIAL, STOP_TYPE_PARTIAL,
}; };
// state diagram: https://github.com/ggerganov/llama.cpp/pull/9283
enum slot_state { enum slot_state {
SLOT_STATE_IDLE, SLOT_STATE_IDLE,
SLOT_STATE_PROCESSING_PROMPT, SLOT_STATE_PROCESSING_PROMPT,
@ -251,7 +252,6 @@ struct server_slot {
{"truncated", truncated} {"truncated", truncated}
}); });
callback_on_release(id); callback_on_release(id);
// queue_tasks.notify_slot_changed();
} }
} }
@ -456,14 +456,15 @@ struct server_queue {
callback_update_slots = std::move(callback); callback_update_slots = std::move(callback);
} }
// Call when the state of one slot is changed // Call when the state of one slot is changed, it will move one task from deferred to main queue
void notify_slot_changed() { void pop_deferred_task() {
// move deferred tasks back to main loop // move deferred tasks back to main loop
std::unique_lock<std::mutex> lock(mutex_tasks); std::unique_lock<std::mutex> lock(mutex_tasks);
for (auto & task : queue_tasks_deferred) { if (!queue_tasks_deferred.empty()) {
server_task task = queue_tasks_deferred.front();
queue_tasks_deferred.erase(queue_tasks_deferred.begin());
queue_tasks.push_back(std::move(task)); queue_tasks.push_back(std::move(task));
} }
queue_tasks_deferred.clear();
} }
// end the start_loop routine // end the start_loop routine
@ -722,7 +723,7 @@ struct server_context {
slot.sparams = params.sparams; slot.sparams = params.sparams;
slot.callback_on_release = [this](int) { slot.callback_on_release = [this](int) {
queue_tasks.notify_slot_changed(); queue_tasks.pop_deferred_task();
}; };
slot.reset(); slot.reset();
@ -2412,6 +2413,7 @@ struct server_context {
} }
if (!process_token(result, slot)) { if (!process_token(result, slot)) {
// release slot because of stop condition
slot.release(); slot.release();
slot.print_timings(); slot.print_timings();
send_final_response(slot); send_final_response(slot);