use deque
This commit is contained in:
parent
27f2c14aa9
commit
86caa35343
2 changed files with 10 additions and 6 deletions
|
@ -38,6 +38,7 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <deque>
|
||||||
|
|
||||||
using json = nlohmann::ordered_json;
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
|
@ -383,8 +384,8 @@ struct server_queue {
|
||||||
bool running;
|
bool running;
|
||||||
|
|
||||||
// queues
|
// queues
|
||||||
std::vector<server_task> queue_tasks;
|
std::deque<server_task> queue_tasks;
|
||||||
std::vector<server_task> queue_tasks_deferred;
|
std::deque<server_task> queue_tasks_deferred;
|
||||||
|
|
||||||
std::mutex mutex_tasks;
|
std::mutex mutex_tasks;
|
||||||
std::condition_variable condition_tasks;
|
std::condition_variable condition_tasks;
|
||||||
|
@ -401,7 +402,7 @@ struct server_queue {
|
||||||
LOG_VERBOSE("new task id", {{"new_id", task.id}});
|
LOG_VERBOSE("new task id", {{"new_id", task.id}});
|
||||||
}
|
}
|
||||||
if (front) {
|
if (front) {
|
||||||
queue_tasks.insert(queue_tasks.begin(), std::move(task));
|
queue_tasks.push_front(std::move(task));
|
||||||
} else {
|
} else {
|
||||||
queue_tasks.push_back(std::move(task));
|
queue_tasks.push_back(std::move(task));
|
||||||
}
|
}
|
||||||
|
@ -417,7 +418,7 @@ struct server_queue {
|
||||||
LOG_VERBOSE("new task id", {{"new_id", task.id}});
|
LOG_VERBOSE("new task id", {{"new_id", task.id}});
|
||||||
}
|
}
|
||||||
if (front) {
|
if (front) {
|
||||||
queue_tasks.insert(queue_tasks.begin(), std::move(task));
|
queue_tasks.push_front(std::move(task));
|
||||||
} else {
|
} else {
|
||||||
queue_tasks.push_back(std::move(task));
|
queue_tasks.push_back(std::move(task));
|
||||||
}
|
}
|
||||||
|
@ -574,14 +575,14 @@ struct server_response {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send a new result to a waiting id_task
|
// Send a new result to a waiting id_task
|
||||||
void send(server_task_result result) {
|
void send(server_task_result & result) {
|
||||||
LOG_VERBOSE("send new result", {{"id_task", result.id}});
|
LOG_VERBOSE("send new result", {{"id_task", result.id}});
|
||||||
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
for (const auto & id_task : waiting_task_ids) {
|
for (const auto & id_task : waiting_task_ids) {
|
||||||
if (result.id == id_task) {
|
if (result.id == id_task) {
|
||||||
LOG_VERBOSE("queue_results.push_back", {{"id_task", id_task}});
|
LOG_VERBOSE("queue_results.push_back", {{"id_task", id_task}});
|
||||||
queue_results.push_back(result);
|
queue_results.push_back(std::move(result));
|
||||||
condition_results.notify_all();
|
condition_results.notify_all();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,9 +8,12 @@ Feature: Wrong usage of llama.cpp server
|
||||||
Scenario: Infinite loop
|
Scenario: Infinite loop
|
||||||
Given a server listening on localhost:8080
|
Given a server listening on localhost:8080
|
||||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||||
|
And 42 as server seed
|
||||||
|
And 2048 KV cache size
|
||||||
# Uncomment below to fix the issue
|
# Uncomment below to fix the issue
|
||||||
#And 64 server max tokens to predict
|
#And 64 server max tokens to predict
|
||||||
Then the server is starting
|
Then the server is starting
|
||||||
|
Then the server is healthy
|
||||||
Given a prompt:
|
Given a prompt:
|
||||||
"""
|
"""
|
||||||
Go to: infinite loop
|
Go to: infinite loop
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue