use deque

This commit is contained in:
Xuan Son Nguyen 2024-09-02 14:58:46 +02:00
parent 27f2c14aa9
commit 86caa35343
2 changed files with 10 additions and 6 deletions

View file

@ -38,6 +38,7 @@
#include <memory> #include <memory>
#include <unordered_set> #include <unordered_set>
#include <unordered_map> #include <unordered_map>
#include <deque>
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
@ -383,8 +384,8 @@ struct server_queue {
bool running; bool running;
// queues // queues
std::vector<server_task> queue_tasks; std::deque<server_task> queue_tasks;
std::vector<server_task> queue_tasks_deferred; std::deque<server_task> queue_tasks_deferred;
std::mutex mutex_tasks; std::mutex mutex_tasks;
std::condition_variable condition_tasks; std::condition_variable condition_tasks;
@ -401,7 +402,7 @@ struct server_queue {
LOG_VERBOSE("new task id", {{"new_id", task.id}}); LOG_VERBOSE("new task id", {{"new_id", task.id}});
} }
if (front) { if (front) {
queue_tasks.insert(queue_tasks.begin(), std::move(task)); queue_tasks.push_front(std::move(task));
} else { } else {
queue_tasks.push_back(std::move(task)); queue_tasks.push_back(std::move(task));
} }
@ -417,7 +418,7 @@ struct server_queue {
LOG_VERBOSE("new task id", {{"new_id", task.id}}); LOG_VERBOSE("new task id", {{"new_id", task.id}});
} }
if (front) { if (front) {
queue_tasks.insert(queue_tasks.begin(), std::move(task)); queue_tasks.push_front(std::move(task));
} else { } else {
queue_tasks.push_back(std::move(task)); queue_tasks.push_back(std::move(task));
} }
@ -574,14 +575,14 @@ struct server_response {
} }
// Send a new result to a waiting id_task // Send a new result to a waiting id_task
void send(server_task_result result) { void send(server_task_result & result) {
LOG_VERBOSE("send new result", {{"id_task", result.id}}); LOG_VERBOSE("send new result", {{"id_task", result.id}});
std::unique_lock<std::mutex> lock(mutex_results); std::unique_lock<std::mutex> lock(mutex_results);
for (const auto & id_task : waiting_task_ids) { for (const auto & id_task : waiting_task_ids) {
if (result.id == id_task) { if (result.id == id_task) {
LOG_VERBOSE("queue_results.push_back", {{"id_task", id_task}}); LOG_VERBOSE("queue_results.push_back", {{"id_task", id_task}});
queue_results.push_back(result); queue_results.push_back(std::move(result));
condition_results.notify_all(); condition_results.notify_all();
return; return;
} }

View file

@ -8,9 +8,12 @@ Feature: Wrong usage of llama.cpp server
Scenario: Infinite loop Scenario: Infinite loop
Given a server listening on localhost:8080 Given a server listening on localhost:8080
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
And 42 as server seed
And 2048 KV cache size
# Uncomment below to fix the issue # Uncomment below to fix the issue
#And 64 server max tokens to predict #And 64 server max tokens to predict
Then the server is starting Then the server is starting
Then the server is healthy
Given a prompt: Given a prompt:
""" """
Go to: infinite loop Go to: infinite loop