Minor updates
This commit is contained in:
parent
9c99ef43d7
commit
011ea9852a
3 changed files with 10 additions and 15 deletions
|
@ -94,7 +94,7 @@ if __name__ == "__main__":
|
|||
|
||||
url = "http://localhost:8080/completion"
|
||||
|
||||
num_requests = 40
|
||||
num_requests = 20
|
||||
q = Queue(maxsize = 64)
|
||||
threads = []
|
||||
|
||||
|
|
|
@ -1589,7 +1589,7 @@ struct llama_server_context
|
|||
slot.t_last_used = ggml_time_us();
|
||||
|
||||
LOG_TEE("slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||
queue_tasks.notify_slot_changed();
|
||||
queue_tasks.notify_slot_changed(); // why don't we immediately reallocate the released slot without waiting? Is this what -cb does?
|
||||
|
||||
continue;
|
||||
}
|
||||
|
@ -2254,22 +2254,12 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
}
|
||||
else if (arg == "-skvg" || arg == "--show-graphics")
|
||||
{
|
||||
if (i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
llama.skvgraphics = true;
|
||||
llama.skvgraphics = true; // -skvg takes no parameter so we don't test ++i >= argc
|
||||
llama.skvinteract = false;
|
||||
}
|
||||
else if (arg == "-skvi" || arg == "--show-interactive-graphics")
|
||||
{
|
||||
if (i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
llama.skvgraphics = true;
|
||||
llama.skvgraphics = true; // -skvi takes no parameter so we don't test ++i >= argc
|
||||
llama.skvinteract = true;
|
||||
}
|
||||
else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers")
|
||||
|
|
|
@ -272,11 +272,14 @@ struct llama_server_queue {
|
|||
// Call when the state of one slot is changed
|
||||
void notify_slot_changed() {
|
||||
// move deferred tasks back to main loop
|
||||
// does this mean when ONE slot finished we move ALL deferred tasks back to the main queue? Why?
|
||||
// it seems that we move everything back to the main queue but we don't allocate a task to the slot just released
|
||||
// lock so nothing gets added while we are clearing the deferred queue
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
for (auto & task : queue_tasks_deferred) {
|
||||
queue_tasks.push_back(std::move(task));
|
||||
}
|
||||
queue_tasks_deferred.clear();
|
||||
queue_tasks_deferred.clear(); // and clear the deferred tasks completely?
|
||||
}
|
||||
|
||||
// end the start_loop routine
|
||||
|
@ -387,12 +390,14 @@ struct llama_server_response {
|
|||
void add_waiting_task_id(int task_id) {
|
||||
std::unique_lock<std::mutex> lock(mutex_results);
|
||||
waiting_task_ids.insert(task_id);
|
||||
printf("\033[21;0H");
|
||||
LOG_TEE("Waiting task list size after addition: %zu.\n", waiting_task_ids.size());
|
||||
}
|
||||
|
||||
void remove_waiting_task_id(int task_id) {
|
||||
std::unique_lock<std::mutex> lock(mutex_results);
|
||||
waiting_task_ids.erase(task_id);
|
||||
printf("\033[21;0H");
|
||||
LOG_TEE("Waiting task list size after removal: %zu.\n", waiting_task_ids.size());
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue