From 58ad3c3ad209a03a67da95d4a12d5ba7fa2ea3cd Mon Sep 17 00:00:00 2001 From: Behnam M <58621210+ibehnam@users.noreply.github.com> Date: Wed, 10 Jan 2024 00:20:33 -0500 Subject: [PATCH] starting http server before initializing the model --- examples/server/server.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d310ffc3d..29c571cd8 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2798,7 +2798,9 @@ int main(int argc, char **argv) {"total_threads", std::thread::hardware_concurrency()}, {"system_info", llama_print_system_info()}, }); - + + httplib::Server svr; + ServerState server_state = LOADING_MODEL; // load the model if (!llama.load_model(params)) @@ -2810,7 +2812,6 @@ int main(int argc, char **argv) llama.initialize(); server_state = READY; - httplib::Server svr; // Middleware for API key validation auto validate_api_key = [&sparams](const httplib::Request &req, httplib::Response &res) -> bool {