From 03d7ff07775b361982649239388d57f945a60ccc Mon Sep 17 00:00:00 2001 From: Behnam M <58621210+ibehnam@users.noreply.github.com> Date: Wed, 10 Jan 2024 00:14:36 -0500 Subject: [PATCH] Better handling of server state When the model is being loaded, the server state is `LOADING_MODEL`. If model-loading fails, the server state becomes `ERROR`, otherwise it becomes `READY`. The `/health` endpoint provides more granular messages now according to the server_state value. --- examples/server/server.cpp | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 67d0cfbca..82856d3f0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -146,6 +146,15 @@ static std::vector base64_decode(const std::string & encoded_string) // parallel // + +enum ServerState { + LOADING_MODEL, // Server is starting up, model not fully loaded yet + READY, // Server is ready and model is loaded + ERROR // An error occurred, load_model failed +}; + + + enum task_type { COMPLETION_TASK, CANCEL_TASK @@ -2789,14 +2798,17 @@ int main(int argc, char **argv) {"total_threads", std::thread::hardware_concurrency()}, {"system_info", llama_print_system_info()}, }); - + + server_state = LOADING_MODEL; // load the model if (!llama.load_model(params)) { + server_state = ERRPR; return 1; } llama.initialize(); + server_state = READY; httplib::Server svr; @@ -2939,12 +2951,23 @@ int main(int argc, char **argv) svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) { - // in real-world applications, it's common to first query the /health endpoint of the server to make sure it's running - // it will return "ok" only after the model is successfully loaded by the server. - res.set_content(R"({"status": "ok"})", "application/json"); - res.status = 200; // HTTP OK + switch(server_state) { + case READY: + res.set_content(R"({"status": "ok"})", "application/json"); + res.status = 200; // HTTP OK + break; + case LOADING: + res.set_content(R"({"status": "loading model"})", "application/json"); + res.status = 503; // HTTP Service Unavailable + break; + case ERROR: + res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json"); + res.status = 500; // HTTP Internal Server Error + break; + } }); - + + svr.Get("/v1/models", [¶ms](const httplib::Request&, httplib::Response& res) { std::time_t t = std::time(0);