server : fix initialization thread issues
This commit is contained in:
parent
2a37bd6b86
commit
f1d4138c13
2 changed files with 14 additions and 14 deletions
|
@ -2719,19 +2719,6 @@ int main(int argc, char **argv)
|
||||||
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
|
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("HTTP server listening", log_data);
|
|
||||||
// run the HTTP server in a thread - see comment below
|
|
||||||
std::thread t([&]()
|
|
||||||
{
|
|
||||||
if (!svr.listen_after_bind())
|
|
||||||
{
|
|
||||||
state.store(SERVER_STATE_ERROR);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
});
|
|
||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
if (!llama.load_model(params))
|
if (!llama.load_model(params))
|
||||||
{
|
{
|
||||||
|
@ -3194,6 +3181,19 @@ int main(int argc, char **argv)
|
||||||
}*/
|
}*/
|
||||||
//);
|
//);
|
||||||
|
|
||||||
|
LOG_INFO("HTTP server listening", log_data);
|
||||||
|
// run the HTTP server in a thread - see comment below
|
||||||
|
std::thread t([&]()
|
||||||
|
{
|
||||||
|
if (!svr.listen_after_bind())
|
||||||
|
{
|
||||||
|
state.store(SERVER_STATE_ERROR);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
|
||||||
llama.queue_tasks.on_new_task(std::bind(
|
llama.queue_tasks.on_new_task(std::bind(
|
||||||
&llama_server_context::process_single_task, &llama, std::placeholders::_1));
|
&llama_server_context::process_single_task, &llama, std::placeholders::_1));
|
||||||
llama.queue_tasks.on_finish_multitask(std::bind(
|
llama.queue_tasks.on_finish_multitask(std::bind(
|
||||||
|
|
|
@ -84,7 +84,7 @@ Feature: llama.cpp server
|
||||||
Then all prompts are predicted
|
Then all prompts are predicted
|
||||||
|
|
||||||
# FIXME: #3969 infinite loop on the CI, not locally, if n_prompt * n_predict > kv_size
|
# FIXME: #3969 infinite loop on the CI, not locally, if n_prompt * n_predict > kv_size
|
||||||
@bug
|
@llama.cpp
|
||||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size
|
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size
|
||||||
Given a prompt:
|
Given a prompt:
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue