add "model_path" to /props

2024-12-07 20:05:04 +01:00 · 2024-12-07 20:05:04 +01:00 · 1949f68f4e
commit 1949f68f4e
parent 65d2e6d675
2 changed files with 5 additions and 1 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -801,7 +801,8 @@ struct server_task_result_metrics : server_task_result {
    uint64_t n_decode_total     = 0;
    uint64_t n_busy_slots_total = 0;

-    // TODO: get rid of this json object and use to_json() instead
+    // while we can also use std::vector<server_slot> this requires copying the slot object which can be quite messy
+    // therefore, we use json to temporarily store the slot.to_json() result
    json slots_data = json::array();

    virtual json to_json() override {
@ -3326,9 +3327,11 @@ int main(int argc, char ** argv) {
    };

    const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
+        // this endpoint is publicly available, please only return what is safe to be exposed
        json data = {
            { "default_generation_settings", ctx_server.default_generation_settings_for_props },
            { "total_slots",                 ctx_server.params_base.n_parallel },
+            { "model_path",                  ctx_server.params_base.model },
            { "chat_template",               llama_get_chat_template(ctx_server.model) },
        };

--- a/examples/server/tests/unit/test_basic.py
+++ b/examples/server/tests/unit/test_basic.py
@ -22,6 +22,7 @@ def test_server_props():
    server.start()
    res = server.make_request("GET", "/props")
    assert res.status_code == 200
+    assert ".gguf" in res.body["model_path"]
    assert res.body["total_slots"] == server.n_slots
    default_val = res.body["default_generation_settings"]
    assert server.n_ctx is not None and server.n_slots is not None