From 86caa35343b3554962fc9b3a35ea7ad21bd9f845 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 2 Sep 2024 14:58:46 +0200 Subject: [PATCH] use deque --- examples/server/server.cpp | 13 +++++++------ examples/server/tests/features/wrong_usages.feature | 3 +++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 024fd1962..109dbc023 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -38,6 +38,7 @@ #include #include #include +#include using json = nlohmann::ordered_json; @@ -383,8 +384,8 @@ struct server_queue { bool running; // queues - std::vector queue_tasks; - std::vector queue_tasks_deferred; + std::deque queue_tasks; + std::deque queue_tasks_deferred; std::mutex mutex_tasks; std::condition_variable condition_tasks; @@ -401,7 +402,7 @@ struct server_queue { LOG_VERBOSE("new task id", {{"new_id", task.id}}); } if (front) { - queue_tasks.insert(queue_tasks.begin(), std::move(task)); + queue_tasks.push_front(std::move(task)); } else { queue_tasks.push_back(std::move(task)); } @@ -417,7 +418,7 @@ struct server_queue { LOG_VERBOSE("new task id", {{"new_id", task.id}}); } if (front) { - queue_tasks.insert(queue_tasks.begin(), std::move(task)); + queue_tasks.push_front(std::move(task)); } else { queue_tasks.push_back(std::move(task)); } @@ -574,14 +575,14 @@ struct server_response { } // Send a new result to a waiting id_task - void send(server_task_result result) { + void send(server_task_result & result) { LOG_VERBOSE("send new result", {{"id_task", result.id}}); std::unique_lock lock(mutex_results); for (const auto & id_task : waiting_task_ids) { if (result.id == id_task) { LOG_VERBOSE("queue_results.push_back", {{"id_task", id_task}}); - queue_results.push_back(result); + queue_results.push_back(std::move(result)); condition_results.notify_all(); return; } diff --git a/examples/server/tests/features/wrong_usages.feature b/examples/server/tests/features/wrong_usages.feature index cf14b3b44..61d5f315e 100644 --- a/examples/server/tests/features/wrong_usages.feature +++ b/examples/server/tests/features/wrong_usages.feature @@ -8,9 +8,12 @@ Feature: Wrong usage of llama.cpp server Scenario: Infinite loop Given a server listening on localhost:8080 And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models + And 42 as server seed + And 2048 KV cache size # Uncomment below to fix the issue #And 64 server max tokens to predict Then the server is starting + Then the server is healthy Given a prompt: """ Go to: infinite loop