From fd74b5ea3489928ad09083f80a7e556c292667b8 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Thu, 7 Mar 2024 10:17:10 +0200
Subject: [PATCH] server : simplify json parsing + add comment about t_last

---
 examples/server/server.cpp | 26 +++++++-------------------
 llama.cpp                  |  2 ++
 2 files changed, 9 insertions(+), 19 deletions(-)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index eeaf36f1b..835e1f858 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -764,6 +764,7 @@ struct llama_server_context {
 
     server_slot * get_slot(int id) {
         int64_t t_last = ggml_time_us();
+
         server_slot * last_used = nullptr;
 
         for (server_slot & slot : slots) {
@@ -771,6 +772,7 @@ struct llama_server_context {
                 return &slot;
             }
 
+            // among all available slots, find the one that has been least recently used
             if (slot.available() && slot.t_last_used < t_last) {
                 last_used = &slot;
                 t_last = slot.t_last_used;
@@ -832,23 +834,9 @@ struct llama_server_context {
         }
 
         // infill
-        if (data.count("input_prefix") != 0) {
-            slot.params.input_prefix = data["input_prefix"];
-        } else {
-            slot.params.input_prefix = "";
-        }
-
-        if (data.count("input_suffix") != 0) {
-            slot.params.input_suffix = data["input_suffix"];
-        } else {
-            slot.params.input_suffix = "";
-        }
-
-        if (data.count("prompt") != 0) {
-            slot.prompt = data["prompt"];
-        } else {
-            slot.prompt = "";
-        }
+        slot.params.input_prefix = json_value(data, "input_prefix", default_params.input_prefix);
+        slot.params.input_suffix = json_value(data, "input_suffix", default_params.input_suffix);
+        slot.prompt              = json_value(data, "prompt",       std::string(""));
 
         // penalize user-provided tokens
         {
@@ -1563,8 +1551,8 @@ struct llama_server_context {
         // release slots
         for (auto & slot : slots) {
             if (slot.command == RELEASE) {
-                slot.state   = IDLE;
-                slot.command = NONE;
+                slot.state       = IDLE;
+                slot.command     = NONE;
                 slot.t_last_used = ggml_time_us();
 
                 LOG_INFO("slot released", {
diff --git a/llama.cpp b/llama.cpp
index f95328601..d93b62cf0 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -13533,12 +13533,14 @@ LLAMA_API int32_t llama_chat_apply_template(
             curr_tmpl = std::string(model_template.data(), model_template.size());
         }
     }
+
     // format the chat to string
     std::vector<const llama_chat_message *> chat_vec;
     chat_vec.resize(n_msg);
     for (size_t i = 0; i < n_msg; i++) {
         chat_vec[i] = &chat[i];
     }
+
     std::string formatted_chat;
     int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec, formatted_chat, add_ass);
     if (res < 0) {