server : fix initialization thread issues
This commit is contained in:
parent
2a37bd6b86
commit
f1d4138c13
2 changed files with 14 additions and 14 deletions
|
@ -2719,19 +2719,6 @@ int main(int argc, char **argv)
|
|||
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
|
||||
}
|
||||
|
||||
LOG_INFO("HTTP server listening", log_data);
|
||||
// run the HTTP server in a thread - see comment below
|
||||
std::thread t([&]()
|
||||
{
|
||||
if (!svr.listen_after_bind())
|
||||
{
|
||||
state.store(SERVER_STATE_ERROR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
// load the model
|
||||
if (!llama.load_model(params))
|
||||
{
|
||||
|
@ -3194,6 +3181,19 @@ int main(int argc, char **argv)
|
|||
}*/
|
||||
//);
|
||||
|
||||
LOG_INFO("HTTP server listening", log_data);
|
||||
// run the HTTP server in a thread - see comment below
|
||||
std::thread t([&]()
|
||||
{
|
||||
if (!svr.listen_after_bind())
|
||||
{
|
||||
state.store(SERVER_STATE_ERROR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
llama.queue_tasks.on_new_task(std::bind(
|
||||
&llama_server_context::process_single_task, &llama, std::placeholders::_1));
|
||||
llama.queue_tasks.on_finish_multitask(std::bind(
|
||||
|
|
|
@ -84,7 +84,7 @@ Feature: llama.cpp server
|
|||
Then all prompts are predicted
|
||||
|
||||
# FIXME: #3969 infinite loop on the CI, not locally, if n_prompt * n_predict > kv_size
|
||||
@bug
|
||||
@llama.cpp
|
||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size
|
||||
Given a prompt:
|
||||
"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue