From 03d7ff07775b361982649239388d57f945a60ccc Mon Sep 17 00:00:00 2001
From: Behnam M <58621210+ibehnam@users.noreply.github.com>
Date: Wed, 10 Jan 2024 00:14:36 -0500
Subject: [PATCH] Better handling of server state

When the model is being loaded, the server state is `LOADING_MODEL`. If model-loading fails, the server state becomes `ERROR`, otherwise it becomes `READY`. The `/health` endpoint provides more granular messages now according to the server_state value.
---
 examples/server/server.cpp | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 67d0cfbca..82856d3f0 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -146,6 +146,15 @@ static std::vector<uint8_t> base64_decode(const std::string & encoded_string)
 // parallel
 //
 
+
+enum ServerState {
+    LOADING_MODEL,  // Server is starting up, model not fully loaded yet
+    READY,          // Server is ready and model is loaded
+    ERROR           // An error occurred, load_model failed
+};
+
+
+
 enum task_type {
     COMPLETION_TASK,
     CANCEL_TASK
@@ -2789,14 +2798,17 @@ int main(int argc, char **argv)
                                 {"total_threads", std::thread::hardware_concurrency()},
                                 {"system_info", llama_print_system_info()},
                             });
-
+    
+    server_state = LOADING_MODEL;
     // load the model
     if (!llama.load_model(params))
     {
+        server_state = ERRPR;
         return 1;
     }
 
     llama.initialize();
+    server_state = READY;
 
     httplib::Server svr;
 
@@ -2939,12 +2951,23 @@ int main(int argc, char **argv)
 
 
     svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) {
-        // in real-world applications, it's common to first query the /health endpoint of the server to make sure it's running
-        // it will return "ok" only after the model is successfully loaded by the server.
-        res.set_content(R"({"status": "ok"})", "application/json");
-        res.status = 200; // HTTP OK
+        switch(server_state) {
+            case READY:
+                res.set_content(R"({"status": "ok"})", "application/json");
+                res.status = 200; // HTTP OK
+                break;
+            case LOADING:
+                res.set_content(R"({"status": "loading model"})", "application/json");
+                res.status = 503; // HTTP Service Unavailable
+                break;
+            case ERROR:
+                res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json");
+                res.status = 500; // HTTP Internal Server Error
+                break;
+        }
     });
-    
+
+
     svr.Get("/v1/models", [&params](const httplib::Request&, httplib::Response& res)
             {
                 std::time_t t = std::time(0);