Better handling of server state

When the model is being loaded, the server state is `LOADING_MODEL`. If model-loading fails, the server state becomes `ERROR`, otherwise it becomes `READY`. The `/health` endpoint provides more granular messages now according to the server_state value.
2024-01-10 00:14:36 -05:00 · 2024-01-10 00:14:36 -05:00 · 03d7ff0777
commit 03d7ff0777
parent 0a2dc7559f
1 changed files with 29 additions and 6 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -146,6 +146,15 @@ static std::vector<uint8_t> base64_decode(const std::string & encoded_string)
 // parallel
 //
 enum ServerState {
    LOADING_MODEL,  // Server is starting up, model not fully loaded yet
    READY,          // Server is ready and model is loaded
    ERROR           // An error occurred, load_model failed
 };
 enum task_type {
    COMPLETION_TASK,
    CANCEL_TASK
@ -2790,13 +2799,16 @@ int main(int argc, char **argv)
                                {"system_info", llama_print_system_info()},
                            });
    server_state = LOADING_MODEL;
    // load the model
    if (!llama.load_model(params))
    {
        server_state = ERRPR;
        return 1;
    }
    llama.initialize();
    server_state = READY;
    httplib::Server svr;
@ -2939,12 +2951,23 @@ int main(int argc, char **argv)
    svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) {
-        // in real-world applications, it's common to first query the /health endpoint of the server to make sure it's running
+        switch(server_state) {
-        // it will return "ok" only after the model is successfully loaded by the server.
+            case READY:
                res.set_content(R"({"status": "ok"})", "application/json");
                res.status = 200; // HTTP OK
                break;
            case LOADING:
                res.set_content(R"({"status": "loading model"})", "application/json");
                res.status = 503; // HTTP Service Unavailable
                break;
            case ERROR:
                res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json");
                res.status = 500; // HTTP Internal Server Error
                break;
        }
    });
    svr.Get("/v1/models", [&params](const httplib::Request&, httplib::Response& res)
            {
                std::time_t t = std::time(0);