improve Llamaserver.py

2024-03-03 11:20:10 +00:00 · 2024-03-03 11:20:10 +00:00 · 480089d00d
commit 480089d00d
parent 265741aa0f
3 changed files with 74 additions and 53 deletions
--- a/Llamaserver.py
+++ b/Llamaserver.py
@ -6,11 +6,21 @@ from time import sleep

 def print_dict(data):
    if isinstance(data, dict):
+        #for k, v in data.items():
+        #    print(f"Key: {k}; Value: {v}\n")
+        #input("",)
        for k, v in data.items():
            if isinstance(v, dict):
                print_dict(v)
            elif k == "content":
-                print(f"Key: {k:>30}: {v}")
+                print(f"Model: {data['model']}")
+                print(f"Max tokens predicted: {data['generation_settings']['n_predict']}")
+                print(f"Prompt evaluation time = {data['timings']['prompt_ms']}")
+                print(f"Token generation time = {data['timings']['predicted_ms']}")
+                print(f"Tokens cached = {data['tokens_cached']}")
+                print(f"Tokens evaluated = {data['tokens_evaluated']}")
+                print(f"Tokens actually predicted = {data['tokens_predicted']}\n")
+                print(f"Response: {v}")
                return
    elif isinstance(data, list):
        for entry in v:
@ -55,7 +65,7 @@ def send_request(q, question, event, count, num_requests):
    
    system = "You are a helpful assistant who answers all requests \
 courteously and accurately without undue repetion. \
-you pay close attention to the nuance of a question and response accordingly."
+You pay close attention to the nuance of a question and respond accordingly."
    
    data = {'system': system, 'prompt': question}
    
@ -132,8 +142,14 @@ if __name__ == "__main__":
                    "Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel",
                    "Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"]

+    philosopher_list = ["Blaise Pascal", "Thomas Hobbes", "Georg Frederik Hegel", "Søren Kierkegaard", "Karl Marx", "Arthur Schopenhauer",
+                        "Ludwig Feuerbach", "Friedrich Nietzsche", "Max Weber", "Sigmund Freud", "Carl Jung",
+                        "Melanie Klein", "John Puddefoot"]
+
+    num_requests = len(philosopher_list)
+    
    for i in range(num_requests):
-        writer = writer_list[i % len(writer_list)]
+        writer = philosopher_list[i % num_requests]
        question = f"Tell me about the writings of {writer}."
        # NOTE: don't pass the parameter as a function call; pass in args
        print(f"Processing request {i} / {num_requests}: {question}\n")
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -390,7 +390,8 @@ static void kvgraphics(std::vector<server_slot>& slots) {
        } else {
            slot_symbol3 = "\u22EE";
        }
-    printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str());
+        std::string prompt = slots[i].prompt.dump();
+    printf(" %4zu/%5zu %2d %s %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str(), prompt.c_str());
    }
    printf("\033[5;0H");   // just start two lines below the heading
    //printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5);     // move cursor to end of cache display and clear thereafter
@ -2284,6 +2285,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
            sparams.public_path = argv[i];
        }
        /*
+        Do we really need to be able to feed a single user to the server?
        else if (arg == "--api-key")
        {
            if (++i >= argc)
@ -2310,7 +2312,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                invalid_param = true;
                break;
            }
-            sparams.api_keys = get_userdata(argv[i]);
+            sparams.api_keys = get_userdata(argv[i]);   // read apikey json data

            key_file.close();
        }
@ -3152,14 +3154,16 @@ int main(int argc, char **argv)
    // Set the base directory for serving static files
    svr.set_base_dir(sparams.public_path);

+    // set the host port to listen on
    std::unordered_map<std::string, std::string> log_data;
    log_data["hostname"] = sparams.hostname;
    log_data["port"] = std::to_string(sparams.port);

-    if (sparams.api_keys.size() == 1) {     // what happens if the size is zero?
+    // process api keys
+    if (sparams.api_keys.size() == 1) {     // should we trap what happens if the size is zero?
        log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4);
    } else if (sparams.api_keys.size() > 1) {
-        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
+        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; // diagnostic; suppress eventually
    }
    for (auto &item : sparams.api_keys) {
        std::string username = item.first;
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -20,48 +20,6 @@ extern bool server_log_json;
 #define SERVER_VERBOSE 1
 #endif

-#if SERVER_VERBOSE != 1
-#define LOG_VERBOSE(MSG, ...)   // if not verbose logging just return empty
-#else
-#define LOG_VERBOSE(MSG, ...)                                            \
-    do                                                                   \
-    {                                                                    \
-        if (server_verbose)                                              \
-        {                                                                \
-            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__,     \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset);   \
-        }                                                                \
-    } while (0)     // this is always false so the loop only compiles once but is treated as a single statement
-#endif
-
-// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
-
-struct LogRedirection {
-  // Set default values for redirection targets and reset strings
-  std::string stdout_target = "stdout_log.log";
-  std::string stdout_reset = "/dev/stdout";
-  std::string stderr_target = "stderr_log.log";
-  std::string stderr_reset = "/dev/stderr";
-};
-
-LogRedirection log_settings;
-
-#define LOG_ERROR(MSG, ...) \
-    server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset)
-
-#define LOG_WARNING(MSG, ...) \
-    server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset)
-
-#define LOG_INFO(MSG, ...) \
-    server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
-               log_settings.stdout_target, log_settings.stderr_target, \
-               log_settings.stdout_reset, log_settings.stderr_reset)
-
 /*
 // Example usage (WIP):
 LogRedirection default_settings;  // Use defaults but not necessary to say so
@ -78,7 +36,7 @@ Yes, using the LogRedirection struct approach eliminates the need to explicitly

 1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable.
 2. Default Values: The struct's members have default values defined, serving as fallbacks.
-3. Macro Handles Settings: The LOG_ERROR macro takes a LogRedirection object and passes its members to the server_log function.
+3. Macro Handles Settings: The LOG_ERROR (etc.) macros take a LogRedirection object and passes its members to the server_log function.

 Example:

@ -87,15 +45,58 @@ LOG_ERROR("Default error", {});  // Uses defaults from an empty LogRedirection o
 This compact usage is possible because:

 {} creates a temporary LogRedirection object with its members implicitly initialized to the default values.
-The macro passes those defaults to server_log, achieving the desired behavior without requiring explicit variable declarations at every call.
+The macro passes those defaults to server_log, achieving the desired behaviour without requiring explicit variable declarations at every call.
 Customization:

-When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR for tailored logging behaviour:
+When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR (etc.) for tailored logging behaviour:

 LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"};
 LOG_ERROR("Custom error", "Details", custom_settings);
 */

+// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
+
+struct LogRedirection {
+  // Set default values for redirection targets and reset strings
+  std::string stdout_target = "stdout.log"; // will be in ./build and eventually overwritten
+  std::string stdout_reset = "/dev/stdout";
+  std::string stderr_target = "stderr.log"; // will be in ./build and eventually overwritten
+  std::string stderr_reset = "/dev/stderr";
+};
+
+LogRedirection log_settings;    // TODO: avoid global declaration
+
+#if SERVER_VERBOSE != 1
+#define LOG_VERBOSE(MSG, ...)   // if not verbose logging just return empty
+#else
+#define LOG_VERBOSE(MSG, ...)                                            \
+    do                                                                   \
+    {                                                                    \
+        if (server_verbose)                                              \
+        {                                                                \
+            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__,     \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset);   \
+        }                                                                \
+    } while (0)     // this is always false so the loop only compiles once but is treated as a single statement
+#endif
+
+#define LOG_ERROR(MSG, ...) \
+    server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset)
+
+#define LOG_WARNING(MSG, ...) \
+    server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset)
+
+#define LOG_INFO(MSG, ...) \
+    server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
+               log_settings.stdout_target, log_settings.stderr_target, \
+               log_settings.stdout_reset, log_settings.stderr_reset)
+
+
 //
 // parallel
 //