From d73456ac5962d8e76b1769c7eef84063467b388a Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Wed, 21 Feb 2024 14:19:18 +0100 Subject: [PATCH] server: health: * include_slots only if slots_endpoint * fix compile warning task.target_id not initialized. --- examples/server/README.md | 2 +- examples/server/server.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 5aec6c39e..6d9f96cd4 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -140,7 +140,7 @@ node index.js - 200 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if no slot are currently available. - 503 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if the query parameter `fail_on_no_slot` is provided and no slot are currently available. - If the query parameter `include_slots` is passed, `slots` field will contain internal slots data. + If the query parameter `include_slots` is passed, `slots` field will contain internal slots data except if `--slots-endpoint-disable` is set. - **POST** `/completion`: Given a `prompt`, it returns the predicted completion. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 93de3de1b..1c4479512 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2601,6 +2601,7 @@ int main(int argc, char **argv) task_server task; task.id = llama.queue_tasks.get_new_id(); task.type = TASK_TYPE_SLOTS_DATA; + task.target_id = -1; llama.queue_results.add_waiting_task_id(task.id); llama.queue_tasks.post(task); @@ -2617,7 +2618,7 @@ int main(int argc, char **argv) {"slots_idle", n_idle_slots}, {"slots_processing", n_processing_slots}}; res.status = 200; // HTTP OK - if (req.has_param("include_slots")) { + if (sparams.slots_endpoint && req.has_param("include_slots")) { health["slots"] = result.result_json["slots"]; } @@ -2647,6 +2648,7 @@ int main(int argc, char **argv) task_server task; task.id = llama.queue_tasks.get_new_id(); task.type = TASK_TYPE_SLOTS_DATA; + task.target_id = -1; llama.queue_results.add_waiting_task_id(task.id); llama.queue_tasks.post(task);