server : simplify json parsing + add comment about t_last

2024-03-07 10:17:10 +02:00 · 2024-03-07 10:17:10 +02:00 · fd74b5ea34
commit fd74b5ea34
parent 9c8d3c8a25
2 changed files with 9 additions and 19 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -764,6 +764,7 @@ struct llama_server_context {
    server_slot * get_slot(int id) {
        int64_t t_last = ggml_time_us();
        server_slot * last_used = nullptr;
        for (server_slot & slot : slots) {
@ -771,6 +772,7 @@ struct llama_server_context {
                return &slot;
            }
            // among all available slots, find the one that has been least recently used
            if (slot.available() && slot.t_last_used < t_last) {
                last_used = &slot;
                t_last = slot.t_last_used;
@ -832,23 +834,9 @@ struct llama_server_context {
        }
        // infill
-        if (data.count("input_prefix") != 0) {
+        slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix);
-            slot.params.input_prefix = data["input_prefix"];
+        slot.params.input_suffix = json_value(data, "input_suffix", default_params.input_suffix);
-        } else {
+        slot.prompt              = json_value(data, "prompt",       std::string(""));
            slot.params.input_prefix = "";
        }
        if (data.count("input_suffix") != 0) {
            slot.params.input_suffix = data["input_suffix"];
        } else {
            slot.params.input_suffix = "";
        }
        if (data.count("prompt") != 0) {
            slot.prompt = data["prompt"];
        } else {
            slot.prompt = "";
        }
        // penalize user-provided tokens
        {
--- a/llama.cpp
+++ b/llama.cpp
@ -13533,12 +13533,14 @@ LLAMA_API int32_t llama_chat_apply_template(
            curr_tmpl = std::string(model_template.data(), model_template.size());
        }
    }
    // format the chat to string
    std::vector<const llama_chat_message *> chat_vec;
    chat_vec.resize(n_msg);
    for (size_t i = 0; i < n_msg; i++) {
        chat_vec[i] = &chat[i];
    }
    std::string formatted_chat;
    int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec, formatted_chat, add_ass);
    if (res < 0) {