server: enrich health endpoint with available slots, return 503 if not slots are available

This commit is contained in:
Pierrick HYMBERT 2024-02-17 12:45:17 +01:00
parent 5bf2b94dd4
commit fb1c1d0fb1

View file

@ -2561,8 +2561,35 @@ int main(int argc, char **argv)
server_state current_state = state.load(); server_state current_state = state.load();
switch(current_state) { switch(current_state) {
case SERVER_STATE_READY: case SERVER_STATE_READY:
if (llama.all_slots_are_idle) {
res.set_content(R"({"status": "ok"})", "application/json"); res.set_content(R"({"status": "ok"})", "application/json");
res.status = 200; // HTTP OK res.status = 200; // HTTP OK
} else {
int available_slots = 0;
int processing_slots = 0;
for (llama_client_slot & slot : llama.slots) {
if (slot.available()) {
available_slots++;
} else {
processing_slots++;
}
}
if (available_slots > 0) {
json health = {
{"status", "ok"},
{"slots_idle", available_slots},
{"slots_processing", processing_slots}};
res.set_content(health.dump(), "application/json");
res.status = 200; // HTTP OK
} else {
json health = {
{"status", "no slot available"},
{"slots_idle", available_slots},
{"slots_processing", processing_slots}};
res.set_content(health.dump(), "application/json");
res.status = 503; // HTTP Service Unavailable
}
}
break; break;
case SERVER_STATE_LOADING_MODEL: case SERVER_STATE_LOADING_MODEL:
res.set_content(R"({"status": "loading model"})", "application/json"); res.set_content(R"({"status": "loading model"})", "application/json");