From fd74b5ea3489928ad09083f80a7e556c292667b8 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 7 Mar 2024 10:17:10 +0200 Subject: [PATCH] server : simplify json parsing + add comment about t_last --- examples/server/server.cpp | 26 +++++++------------------- llama.cpp | 2 ++ 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index eeaf36f1b..835e1f858 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -764,6 +764,7 @@ struct llama_server_context { server_slot * get_slot(int id) { int64_t t_last = ggml_time_us(); + server_slot * last_used = nullptr; for (server_slot & slot : slots) { @@ -771,6 +772,7 @@ struct llama_server_context { return &slot; } + // among all available slots, find the one that has been least recently used if (slot.available() && slot.t_last_used < t_last) { last_used = &slot; t_last = slot.t_last_used; @@ -832,23 +834,9 @@ struct llama_server_context { } // infill - if (data.count("input_prefix") != 0) { - slot.params.input_prefix = data["input_prefix"]; - } else { - slot.params.input_prefix = ""; - } - - if (data.count("input_suffix") != 0) { - slot.params.input_suffix = data["input_suffix"]; - } else { - slot.params.input_suffix = ""; - } - - if (data.count("prompt") != 0) { - slot.prompt = data["prompt"]; - } else { - slot.prompt = ""; - } + slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix); + slot.params.input_suffix = json_value(data, "input_suffix", default_params.input_suffix); + slot.prompt = json_value(data, "prompt", std::string("")); // penalize user-provided tokens { @@ -1563,8 +1551,8 @@ struct llama_server_context { // release slots for (auto & slot : slots) { if (slot.command == RELEASE) { - slot.state = IDLE; - slot.command = NONE; + slot.state = IDLE; + slot.command = NONE; slot.t_last_used = ggml_time_us(); LOG_INFO("slot released", { diff --git a/llama.cpp b/llama.cpp index f95328601..d93b62cf0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13533,12 +13533,14 @@ LLAMA_API int32_t llama_chat_apply_template( curr_tmpl = std::string(model_template.data(), model_template.size()); } } + // format the chat to string std::vector chat_vec; chat_vec.resize(n_msg); for (size_t i = 0; i < n_msg; i++) { chat_vec[i] = &chat[i]; } + std::string formatted_chat; int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec, formatted_chat, add_ass); if (res < 0) {