Better handling of server state
When the model is being loaded, the server state is `LOADING_MODEL`. If model-loading fails, the server state becomes `ERROR`, otherwise it becomes `READY`. The `/health` endpoint provides more granular messages now according to the server_state value.
This commit is contained in:
parent
0a2dc7559f
commit
03d7ff0777
1 changed files with 29 additions and 6 deletions
|
@ -146,6 +146,15 @@ static std::vector<uint8_t> base64_decode(const std::string & encoded_string)
|
||||||
// parallel
|
// parallel
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
|
enum ServerState {
|
||||||
|
LOADING_MODEL, // Server is starting up, model not fully loaded yet
|
||||||
|
READY, // Server is ready and model is loaded
|
||||||
|
ERROR // An error occurred, load_model failed
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum task_type {
|
enum task_type {
|
||||||
COMPLETION_TASK,
|
COMPLETION_TASK,
|
||||||
CANCEL_TASK
|
CANCEL_TASK
|
||||||
|
@ -2790,13 +2799,16 @@ int main(int argc, char **argv)
|
||||||
{"system_info", llama_print_system_info()},
|
{"system_info", llama_print_system_info()},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
server_state = LOADING_MODEL;
|
||||||
// load the model
|
// load the model
|
||||||
if (!llama.load_model(params))
|
if (!llama.load_model(params))
|
||||||
{
|
{
|
||||||
|
server_state = ERRPR;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama.initialize();
|
llama.initialize();
|
||||||
|
server_state = READY;
|
||||||
|
|
||||||
httplib::Server svr;
|
httplib::Server svr;
|
||||||
|
|
||||||
|
@ -2939,12 +2951,23 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
|
|
||||||
svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) {
|
svr.Get("/health", [&](const httplib::Request&, httplib::Response& res) {
|
||||||
// in real-world applications, it's common to first query the /health endpoint of the server to make sure it's running
|
switch(server_state) {
|
||||||
// it will return "ok" only after the model is successfully loaded by the server.
|
case READY:
|
||||||
res.set_content(R"({"status": "ok"})", "application/json");
|
res.set_content(R"({"status": "ok"})", "application/json");
|
||||||
res.status = 200; // HTTP OK
|
res.status = 200; // HTTP OK
|
||||||
|
break;
|
||||||
|
case LOADING:
|
||||||
|
res.set_content(R"({"status": "loading model"})", "application/json");
|
||||||
|
res.status = 503; // HTTP Service Unavailable
|
||||||
|
break;
|
||||||
|
case ERROR:
|
||||||
|
res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json");
|
||||||
|
res.status = 500; // HTTP Internal Server Error
|
||||||
|
break;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
svr.Get("/v1/models", [¶ms](const httplib::Request&, httplib::Response& res)
|
svr.Get("/v1/models", [¶ms](const httplib::Request&, httplib::Response& res)
|
||||||
{
|
{
|
||||||
std::time_t t = std::time(0);
|
std::time_t t = std::time(0);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue