server : fix initialization thread issues

2024-02-21 13:08:57 +02:00 · 2024-02-21 13:08:57 +02:00 · f1d4138c13
commit f1d4138c13
parent 2a37bd6b86
2 changed files with 14 additions and 14 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -2719,19 +2719,6 @@ int main(int argc, char **argv)
        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
    }
    LOG_INFO("HTTP server listening", log_data);
    // run the HTTP server in a thread - see comment below
    std::thread t([&]()
            {
                if (!svr.listen_after_bind())
                {
                    state.store(SERVER_STATE_ERROR);
                    return 1;
                }
                return 0;
            });
    // load the model
    if (!llama.load_model(params))
    {
@ -3194,6 +3181,19 @@ int main(int argc, char **argv)
    }*/
    //);
    LOG_INFO("HTTP server listening", log_data);
    // run the HTTP server in a thread - see comment below
    std::thread t([&]()
            {
                if (!svr.listen_after_bind())
                {
                    state.store(SERVER_STATE_ERROR);
                    return 1;
                }
                return 0;
            });
    llama.queue_tasks.on_new_task(std::bind(
        &llama_server_context::process_single_task, &llama, std::placeholders::_1));
    llama.queue_tasks.on_finish_multitask(std::bind(
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@ -84,7 +84,7 @@ Feature: llama.cpp server
    Then all prompts are predicted
  # FIXME: #3969 infinite loop on the CI, not locally, if n_prompt * n_predict > kv_size
-  @bug
+  @llama.cpp
  Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size
    Given a prompt:
      """