From 5d61ae8d2a5b807332b2216fdf602c85e4276714 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Sat, 2 Mar 2024 10:24:07 +0000
Subject: [PATCH] Renaming some vars

---
 examples/server/server.cpp | 14 ++++++------
 examples/server/utils.hpp  | 47 ++------------------------------------
 2 files changed, 9 insertions(+), 52 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 5b8d1a9e3..e37da6133 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -253,8 +253,8 @@ struct server_slot {
         std::string stdout_reset;
         std::string stderr_target;
         std::string stderr_reset;
-        double t_token = t_prompt_processing / num_prompt_tokens_processed;
-        double n_tokens_second = 1e3 / t_prompt_processing * num_prompt_tokens_processed;
+        double t_token = t_prompt_processing / n_prompt_tokens_processed;
+        double n_tokens_second = 1e3 / t_prompt_processing * n_prompt_tokens_processed;
         //printf("\033[72;0H]");
         sprintf(buffer, "prompt eval time     = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)",
                 t_prompt_processing, n_prompt_tokens_processed,
@@ -328,7 +328,7 @@ struct server_metrics {
 
 // experimental/diagostic graphic to show kvcache status
 // requires just `slots` and `params.n_ctx` as parameters
-static void kvgraphics(std::vector<llama_client_slot>& slots) {
+static void kvgraphics(std::vector<server_slot>& slots) {
 
     int max_length = 144;
     int num_blocks = slots.size();
@@ -604,7 +604,7 @@ struct llama_server_context
     // the logic seems wrong in this function
     // why should there be an id in a task matching a slot.id before a slot has been assigned?
     // most commonly id = -1 so we deal with that first rather than the specified id > 0
-    llama_client_slot* get_slot(int id) {
+    server_slot* get_slot(int id) {
         int64_t t_last = ggml_time_us();
         server_slot *last_used = nullptr;
 
@@ -1168,7 +1168,7 @@ struct llama_server_context
         queue_results.send(res);
     }
 
-    json get_formatted_generation(llama_client_slot &slot)
+    json get_formatted_generation(server_slot &slot)
     {
         const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
         const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
@@ -1270,7 +1270,7 @@ struct llama_server_context
             {"model",               params.model_alias},
             {"tokens_predicted",    slot.n_decoded},
             {"tokens_evaluated",    slot.n_prompt_tokens},
-            {"generation_settings", get_formated_generation(slot)},
+            {"generation_settings", get_formatted_generation(slot)},
             {"prompt",              slot.prompt},
             {"truncated",           slot.truncated},
             {"stopped_eos",         slot.stopped_eos},
@@ -1559,7 +1559,7 @@ struct llama_server_context
                 int n_idle_slots       = 0;
                 int n_processing_slots = 0;
 
-                for (llama_client_slot &slot: slots) {
+                for (server_slot &slot: slots) {
                     json slot_data = get_formatted_generation(slot);
                     slot_data["id"] = slot.id;
                     slot_data["task_id"] = slot.task_id;
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index ad6e4ca4d..4c7ade096 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -137,48 +137,6 @@ struct task_multi {
     std::vector<task_result> results{};
 };
 
-// TODO: can become bool if we can't find use of more states; MAYBE there is a case for RESERVED to keep slots dedicated to chats?
-enum slot_state
-{
-    IDLE,
-    PROCESSING,
-};
-
-enum slot_command
-{
-    NONE,
-    LOAD_PROMPT,
-    RELEASE,
-};
-
-struct slot_params
-{
-    bool stream       = true;
-    bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
-
-    uint32_t seed      = -1; // RNG seed
-    int32_t  n_keep    =  0; // number of tokens to keep from initial prompt
-    int32_t  n_predict = -1; // new tokens to predict
-
-    std::vector<std::string> antiprompt;
-
-    json input_prefix;
-    json input_suffix;
-};
-
-struct slot_image
-{
-    int32_t id;
-
-    bool request_encode_image = false;
-    float * image_embedding = nullptr;
-    int32_t image_tokens = 0;
-
-    clip_image_u8 * img_data;
-
-    std::string prefix_prompt; // before of this image
-};
-
 // completion token output with probabilities
 struct completion_token_output {
     struct token_prob
@@ -198,7 +156,6 @@ struct token_translator {
     std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
 };
 
-static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
 static inline void server_log(
     const char *level,
     const char *function,
@@ -211,8 +168,8 @@ static inline void server_log(
     std::string stderr_target,
     std::string stdout_reset,
     std::string stderr_reset
-)
-{
+    )
+    {
     std::stringstream ss_tid;
     ss_tid << std::this_thread::get_id();
     json log = nlohmann::ordered_json{