From 12610861639c30201bf6071fc951fd5954bb2b2e Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Wed, 4 Dec 2024 19:36:37 +0100
Subject: [PATCH] minor style fix

---
 examples/server/server.cpp | 25 ++++++++++++++++---------
 examples/server/server.hpp |  9 +++++----
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 60947a17f..469663b2e 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1170,12 +1170,15 @@ struct server_context {
         server_task_result_cmpl_partial res;
         res.id              = slot.id_task;
         res.index           = slot.index;
+        res.content         = tkn.text_to_send;
+
+        res.truncated       = slot.truncated;
         res.n_decoded       = slot.n_decoded;
         res.n_prompt_tokens = slot.n_prompt_tokens;
-        res.content         = tkn.text_to_send;
-        res.stop            = slot.stop;
-        res.truncated       = slot.truncated;
 
+        res.stop            = slot.stop;
+
+        // populate res.probs_output
         if (slot.params.sampling.n_probs > 0) {
             const llama_tokens to_send_toks = common_tokenize(ctx, tkn.text_to_send, false);
             const size_t probs_pos      = std::min(slot.n_sent_token_probs,                       slot.generated_token_probs.size());
@@ -1206,20 +1209,22 @@ struct server_context {
         server_task_result_cmpl_final res;
         res.id              = slot.id_task;
         res.id_slot         = slot.id;
+
         res.index           = slot.index;
         res.content         = slot.generated_text;
+        res.timings         = slot.get_timings();
+        res.model_alias     = slot.oaicompat_model;
+        res.prompt          = common_detokenize(ctx, slot.prompt_tokens, true);
 
+        res.truncated       = slot.truncated;
         res.n_decoded       = slot.n_decoded;
         res.n_prompt_tokens = slot.n_prompt_tokens;
-        res.has_new_line    = slot.has_new_line;
         res.n_tokens_cached = slot.n_past;
-        res.content         = slot.generated_text;
+        res.has_new_line    = slot.has_new_line;
+        res.stopping_word   = slot.stopping_word;
         res.stop            = slot.stop;
-        res.truncated       = slot.truncated;
-        res.timings         = slot.get_timings();
-
-        res.generation_params = slot.params; // copy the parameters
 
+        // populate res.probs_output
         if (slot.params.sampling.n_probs > 0) {
             if (!slot.params.stream && slot.stop == STOP_TYPE_WORD) {
                 const llama_tokens stop_word_toks = common_tokenize(ctx, slot.stopping_word, false);
@@ -1235,6 +1240,8 @@ struct server_context {
             }
         }
 
+        res.generation_params = slot.params; // copy the parameters
+
         queue_results.send(res);
     }
 
diff --git a/examples/server/server.hpp b/examples/server/server.hpp
index e9c94fa56..1e65614f6 100644
--- a/examples/server/server.hpp
+++ b/examples/server/server.hpp
@@ -237,7 +237,6 @@ struct server_task_result_cmpl_final : server_task_result {
     int index = 0;
     std::string content;
     bool stream;
-    bool timings_per_token;
     result_timings timings;
     std::string model_alias;
     std::string prompt;
@@ -245,10 +244,11 @@ struct server_task_result_cmpl_final : server_task_result {
     bool truncated;
     int32_t n_decoded;
     int32_t n_prompt_tokens;
-    int32_t has_new_line;
-    int32_t stopping_word;
     int32_t n_tokens_cached;
+    int32_t has_new_line;
+    std::string stopping_word;
     stop_type stop = STOP_TYPE_NONE;
+
     std::vector<completion_token_output> probs_output;
 
     slot_params generation_params;
@@ -291,6 +291,7 @@ struct server_task_result_cmpl_partial : server_task_result {
     int32_t n_prompt_tokens;
 
     stop_type stop = STOP_TYPE_NONE;
+
     std::vector<completion_token_output> probs_output;
     result_timings timings;
 
@@ -346,7 +347,7 @@ struct server_task_result_embd : server_task_result {
 struct server_task_result_rerank : server_task_result {
     server_task_result_rerank() : server_task_result(RESULT_TYPE_RERANK) {}
     int index = 0;
-    float score;
+    float score = -1e6;
 
     json to_json() {
         return json {