From e2ee37761ef89e84a30d2950844d8fdfbe1e6b15 Mon Sep 17 00:00:00 2001 From: ziadb Date: Mon, 27 Nov 2023 18:12:58 -0500 Subject: [PATCH] * remove atomicity of id_gen, and change lock_guard to unique_lock on completion requests --- examples/server/server.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2b1ba4eca..443b3c00a 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -524,7 +524,7 @@ struct llama_server_context bool all_slots_are_idle = false; bool add_bos_token = true; - std::atomic id_gen; + int32_t id_gen; int32_t n_ctx; // total context for all clients / slots // system prompt @@ -542,7 +542,7 @@ struct llama_server_context std::vector queue_tasks; std::vector queue_results; std::vector queue_multitasks; - std::mutex mutex_tasks; + std::mutex mutex_tasks; // also guards id_gen std::mutex mutex_results; std::mutex mutex_multitasks; @@ -1333,6 +1333,7 @@ struct llama_server_context int request_completion(json data, bool infill, bool embedding, int multitask_id) { + std::unique_lock lock(mutex_tasks); task_server task; task.id = id_gen++; task.target_id = 0; @@ -1345,12 +1346,12 @@ struct llama_server_context // when a completion task's prompt array is not a singleton, we split it into multiple requests if (task.data.at("prompt").size() > 1) { + lock.unlock(); // entering new func scope auto id = split_multiprompt_task(task); return id; } // otherwise, it's a single-prompt task, we actually queue it - std::lock_guard lock(mutex_tasks); queue_tasks.push_back(task); return task.id; }