server : various fixes (#10704)

* server : various fixes ggml-ci * server : show curent seed in slot_params ggml-ci * fix /slots endpoint * Update examples/server/server.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : reflect endpoint response changes in the readme ggml-ci --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co> Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
2024-12-07 18:02:05 +02:00 · 2024-12-07 18:02:05 +02:00 · ce4a7b8493
commit ce4a7b8493
parent 19d8762ab6
4 changed files with 178 additions and 97 deletions
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -327,12 +327,12 @@ static std::string llama_get_chat_template(const struct llama_model * model) {
    std::string template_key = "tokenizer.chat_template";
    // call with NULL buffer to get the total size of the string
    int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0);
-    if (res < 0) {
+    if (res < 2) {
        return "";
    } else {
        std::vector<char> model_template(res, 0);
        llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
-        return std::string(model_template.data(), model_template.size());
+        return std::string(model_template.data(), model_template.size() - 1);
    }
 }