From 480089d00d4edf203b1564319c24adb922f609aa Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Sun, 3 Mar 2024 11:20:10 +0000
Subject: [PATCH] improve Llamaserver.py

---
 Llamaserver.py             | 24 ++++++++--
 examples/server/server.cpp | 12 +++--
 examples/server/utils.hpp  | 91 +++++++++++++++++++-------------------
 3 files changed, 74 insertions(+), 53 deletions(-)

diff --git a/Llamaserver.py b/Llamaserver.py
index b419e6942..33c615008 100644
--- a/Llamaserver.py
+++ b/Llamaserver.py
@@ -6,11 +6,21 @@ from time import sleep
 
 def print_dict(data):
     if isinstance(data, dict):
+        #for k, v in data.items():
+        #    print(f"Key: {k}; Value: {v}\n")
+        #input("",)
         for k, v in data.items():
             if isinstance(v, dict):
                 print_dict(v)
-            elif k == "content":          
-                print(f"Key: {k:>30}: {v}")
+            elif k == "content":
+                print(f"Model: {data['model']}")
+                print(f"Max tokens predicted: {data['generation_settings']['n_predict']}")
+                print(f"Prompt evaluation time = {data['timings']['prompt_ms']}")
+                print(f"Token generation time = {data['timings']['predicted_ms']}")
+                print(f"Tokens cached = {data['tokens_cached']}")
+                print(f"Tokens evaluated = {data['tokens_evaluated']}")
+                print(f"Tokens actually predicted = {data['tokens_predicted']}\n")
+                print(f"Response: {v}")
                 return
     elif isinstance(data, list):
         for entry in v:
@@ -55,7 +65,7 @@ def send_request(q, question, event, count, num_requests):
     
     system = "You are a helpful assistant who answers all requests \
 courteously and accurately without undue repetion. \
-you pay close attention to the nuance of a question and response accordingly."
+You pay close attention to the nuance of a question and respond accordingly."
     
     data = {'system': system, 'prompt': question}
     
@@ -131,9 +141,15 @@ if __name__ == "__main__":
                     "Israel", "Egypt", "Kenya", "Chile", "Mexico", "Canada", "Ecuador", "Brazil", "Argentina", "Colombia",
                     "Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel",
                     "Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"]
+
+    philosopher_list = ["Blaise Pascal", "Thomas Hobbes", "Georg Frederik Hegel", "Søren Kierkegaard", "Karl Marx", "Arthur Schopenhauer",
+                        "Ludwig Feuerbach", "Friedrich Nietzsche", "Max Weber", "Sigmund Freud", "Carl Jung",
+                        "Melanie Klein", "John Puddefoot"]
+
+    num_requests = len(philosopher_list)
     
     for i in range(num_requests):
-        writer = writer_list[i % len(writer_list)]
+        writer = philosopher_list[i % num_requests]
         question = f"Tell me about the writings of {writer}."
         # NOTE: don't pass the parameter as a function call; pass in args
         print(f"Processing request {i} / {num_requests}: {question}\n")
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index d237d2e66..4a5a875c4 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -390,7 +390,8 @@ static void kvgraphics(std::vector<server_slot>& slots) {
         } else {
             slot_symbol3 = "\u22EE";
         }
-    printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str());
+        std::string prompt = slots[i].prompt.dump();
+    printf(" %4zu/%5zu %2d %s %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str(), prompt.c_str());
     }
     printf("\033[5;0H");   // just start two lines below the heading
     //printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5);     // move cursor to end of cache display and clear thereafter
@@ -2284,6 +2285,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
             sparams.public_path = argv[i];
         }
         /*
+        Do we really need to be able to feed a single user to the server?
         else if (arg == "--api-key")
         {
             if (++i >= argc)
@@ -2310,7 +2312,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                 invalid_param = true;
                 break;
             }
-            sparams.api_keys = get_userdata(argv[i]);
+            sparams.api_keys = get_userdata(argv[i]);   // read apikey json data
 
             key_file.close();
         }
@@ -3152,14 +3154,16 @@ int main(int argc, char **argv)
     // Set the base directory for serving static files
     svr.set_base_dir(sparams.public_path);
 
+    // set the host port to listen on
     std::unordered_map<std::string, std::string> log_data;
     log_data["hostname"] = sparams.hostname;
     log_data["port"] = std::to_string(sparams.port);
 
-    if (sparams.api_keys.size() == 1) {     // what happens if the size is zero?
+    // process api keys
+    if (sparams.api_keys.size() == 1) {     // should we trap what happens if the size is zero?
         log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4);
     } else if (sparams.api_keys.size() > 1) {
-        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
+        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; // diagnostic; suppress eventually
     }
     for (auto &item : sparams.api_keys) {
         std::string username = item.first;
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index dfd38e2ce..9af0ffaf4 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -20,48 +20,6 @@ extern bool server_log_json;
 #define SERVER_VERBOSE 1
 #endif
 
-#if SERVER_VERBOSE != 1
-#define LOG_VERBOSE(MSG, ...)   // if not verbose logging just return empty
-#else
-#define LOG_VERBOSE(MSG, ...)                                            \
-    do                                                                   \
-    {                                                                    \
-        if (server_verbose)                                              \
-        {                                                                \
-            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__,     \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset);   \
-        }                                                                \
-    } while (0)     // this is always false so the loop only compiles once but is treated as a single statement
-#endif
-
-// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
-
-struct LogRedirection {
-  // Set default values for redirection targets and reset strings
-  std::string stdout_target = "stdout_log.log";
-  std::string stdout_reset = "/dev/stdout";
-  std::string stderr_target = "stderr_log.log";
-  std::string stderr_reset = "/dev/stderr";
-};
-
-LogRedirection log_settings;
-
-#define LOG_ERROR(MSG, ...) \
-    server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset)
-
-#define LOG_WARNING(MSG, ...) \
-    server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset)
-
-#define LOG_INFO(MSG, ...) \
-    server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset)
-
 /*
 // Example usage (WIP):
 LogRedirection default_settings;  // Use defaults but not necessary to say so
@@ -78,7 +36,7 @@ Yes, using the LogRedirection struct approach eliminates the need to explicitly
 
 1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable.
 2. Default Values: The struct's members have default values defined, serving as fallbacks.
-3. Macro Handles Settings: The LOG_ERROR macro takes a LogRedirection object and passes its members to the server_log function.
+3. Macro Handles Settings: The LOG_ERROR (etc.) macros take a LogRedirection object and passes its members to the server_log function.
 
 Example:
 
@@ -87,15 +45,58 @@ LOG_ERROR("Default error", {});  // Uses defaults from an empty LogRedirection o
 This compact usage is possible because:
 
 {} creates a temporary LogRedirection object with its members implicitly initialized to the default values.
-The macro passes those defaults to server_log, achieving the desired behavior without requiring explicit variable declarations at every call.
+The macro passes those defaults to server_log, achieving the desired behaviour without requiring explicit variable declarations at every call.
 Customization:
 
-When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR for tailored logging behaviour:
+When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR (etc.) for tailored logging behaviour:
 
 LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"};
 LOG_ERROR("Custom error", "Details", custom_settings);
 */
 
+// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
+
+struct LogRedirection {
+  // Set default values for redirection targets and reset strings
+  std::string stdout_target = "stdout.log"; // will be in ./build and eventually overwritten
+  std::string stdout_reset = "/dev/stdout";
+  std::string stderr_target = "stderr.log"; // will be in ./build and eventually overwritten
+  std::string stderr_reset = "/dev/stderr";
+};
+
+LogRedirection log_settings;    // TODO: avoid global declaration
+
+#if SERVER_VERBOSE != 1
+#define LOG_VERBOSE(MSG, ...)   // if not verbose logging just return empty
+#else
+#define LOG_VERBOSE(MSG, ...)                                            \
+    do                                                                   \
+    {                                                                    \
+        if (server_verbose)                                              \
+        {                                                                \
+            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__,     \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset);   \
+        }                                                                \
+    } while (0)     // this is always false so the loop only compiles once but is treated as a single statement
+#endif
+
+#define LOG_ERROR(MSG, ...) \
+    server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset)
+
+#define LOG_WARNING(MSG, ...) \
+    server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset)
+
+#define LOG_INFO(MSG, ...) \
+    server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset)
+
+
 //
 // parallel
 //