diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 5a952f99b..de20374be 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2824,18 +2824,42 @@ int main(int argc, char **argv) } }); + LOG_INFO("HTTP server listening", log_data); + // run the HTTP server in a thread - see comment below + std::thread t([&]() + { + if (!svr.listen_after_bind()) + { + server_state = ERROR; + return 1; + } + + return 0; + }); + + // GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!? + // "Bus error: 10" - this is on macOS, it does not crash on Linux + //std::thread t2([&]() + { + bool running = true; + while (running) + { + running = llama.update_slots(); + } + } + //); + // load the model if (!llama.load_model(params)) { server_state = ERROR; return 1; + } else { + llama.initialize(); + server_state = READY; } - llama.initialize(); - server_state = READY; - - // Middleware for API key validation auto validate_api_key = [&sparams](const httplib::Request &req, httplib::Response &res) -> bool { // If API key is not set, skip validation @@ -3252,30 +3276,6 @@ int main(int argc, char **argv) log_data["api_key"] = "api_key: ****" + sparams.api_key.substr(sparams.api_key.length() - 4); } - LOG_INFO("HTTP server listening", log_data); - // run the HTTP server in a thread - see comment below - std::thread t([&]() - { - if (!svr.listen_after_bind()) - { - return 1; - } - - return 0; - }); - - // GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!? - // "Bus error: 10" - this is on macOS, it does not crash on Linux - //std::thread t2([&]() - { - bool running = true; - while (running) - { - running = llama.update_slots(); - } - } - //); - t.join(); llama_backend_free();