improve Llamaserver.py

2024-03-03 11:20:10 +00:00 · 2024-03-03 11:20:10 +00:00 · 480089d00d
commit 480089d00d
parent 265741aa0f
3 changed files with 74 additions and 53 deletions
--- a/Llamaserver.py
+++ b/Llamaserver.py
@ -6,11 +6,21 @@ from time import sleep
 def print_dict(data):
    if isinstance(data, dict):
        #for k, v in data.items():
        #    print(f"Key: {k}; Value: {v}\n")
        #input("",)
        for k, v in data.items():
            if isinstance(v, dict):
                print_dict(v)
-            elif k == "content":          
+            elif k == "content":
-                print(f"Key: {k:>30}: {v}")
+                print(f"Model: {data['model']}")
                print(f"Max tokens predicted: {data['generation_settings']['n_predict']}")
                print(f"Prompt evaluation time = {data['timings']['prompt_ms']}")
                print(f"Token generation time = {data['timings']['predicted_ms']}")
                print(f"Tokens cached = {data['tokens_cached']}")
                print(f"Tokens evaluated = {data['tokens_evaluated']}")
                print(f"Tokens actually predicted = {data['tokens_predicted']}\n")
                print(f"Response: {v}")
                return
    elif isinstance(data, list):
        for entry in v:
@ -55,7 +65,7 @@ def send_request(q, question, event, count, num_requests):
    system = "You are a helpful assistant who answers all requests \
 courteously and accurately without undue repetion. \
-you pay close attention to the nuance of a question and response accordingly."
+You pay close attention to the nuance of a question and respond accordingly."
    data = {'system': system, 'prompt': question}
@ -131,9 +141,15 @@ if __name__ == "__main__":
                    "Israel", "Egypt", "Kenya", "Chile", "Mexico", "Canada", "Ecuador", "Brazil", "Argentina", "Colombia",
                    "Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel",
                    "Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"]
    philosopher_list = ["Blaise Pascal", "Thomas Hobbes", "Georg Frederik Hegel", "Søren Kierkegaard", "Karl Marx", "Arthur Schopenhauer",
                        "Ludwig Feuerbach", "Friedrich Nietzsche", "Max Weber", "Sigmund Freud", "Carl Jung",
                        "Melanie Klein", "John Puddefoot"]
    num_requests = len(philosopher_list)
    for i in range(num_requests):
-        writer = writer_list[i % len(writer_list)]
+        writer = philosopher_list[i % num_requests]
        question = f"Tell me about the writings of {writer}."
        # NOTE: don't pass the parameter as a function call; pass in args
        print(f"Processing request {i} / {num_requests}: {question}\n")
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -390,7 +390,8 @@ static void kvgraphics(std::vector<server_slot>& slots) {
        } else {
            slot_symbol3 = "\u22EE";
        }
-    printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str());
+        std::string prompt = slots[i].prompt.dump();
    printf(" %4zu/%5zu %2d %s %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str(), prompt.c_str());
    }
    printf("\033[5;0H");   // just start two lines below the heading
    //printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5);     // move cursor to end of cache display and clear thereafter
@ -2284,6 +2285,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
            sparams.public_path = argv[i];
        }
        /*
        Do we really need to be able to feed a single user to the server?
        else if (arg == "--api-key")
        {
            if (++i >= argc)
@ -2310,7 +2312,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                invalid_param = true;
                break;
            }
-            sparams.api_keys = get_userdata(argv[i]);
+            sparams.api_keys = get_userdata(argv[i]);   // read apikey json data
            key_file.close();
        }
@ -3152,14 +3154,16 @@ int main(int argc, char **argv)
    // Set the base directory for serving static files
    svr.set_base_dir(sparams.public_path);
    // set the host port to listen on
    std::unordered_map<std::string, std::string> log_data;
    log_data["hostname"] = sparams.hostname;
    log_data["port"] = std::to_string(sparams.port);
-    if (sparams.api_keys.size() == 1) {     // what happens if the size is zero?
+    // process api keys
    if (sparams.api_keys.size() == 1) {     // should we trap what happens if the size is zero?
        log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4);
    } else if (sparams.api_keys.size() > 1) {
-        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
+        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; // diagnostic; suppress eventually
    }
    for (auto &item : sparams.api_keys) {
        std::string username = item.first;
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -20,48 +20,6 @@ extern bool server_log_json;
 #define SERVER_VERBOSE 1
 #endif
 #if SERVER_VERBOSE != 1
 #define LOG_VERBOSE(MSG, ...)   // if not verbose logging just return empty
 #else
 #define LOG_VERBOSE(MSG, ...)                                            \
    do                                                                   \
    {                                                                    \
        if (server_verbose)                                              \
        {                                                                \
            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__,     \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset);   \
        }                                                                \
    } while (0)     // this is always false so the loop only compiles once but is treated as a single statement
 #endif
 // ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
 struct LogRedirection {
  // Set default values for redirection targets and reset strings
  std::string stdout_target = "stdout_log.log";
  std::string stdout_reset = "/dev/stdout";
  std::string stderr_target = "stderr_log.log";
  std::string stderr_reset = "/dev/stderr";
 };
 LogRedirection log_settings;
 #define LOG_ERROR(MSG, ...) \
    server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 #define LOG_WARNING(MSG, ...) \
    server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 #define LOG_INFO(MSG, ...) \
    server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 /*
 // Example usage (WIP):
 LogRedirection default_settings;  // Use defaults but not necessary to say so
@ -78,7 +36,7 @@ Yes, using the LogRedirection struct approach eliminates the need to explicitly
 1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable.
 2. Default Values: The struct's members have default values defined, serving as fallbacks.
-3. Macro Handles Settings: The LOG_ERROR macro takes a LogRedirection object and passes its members to the server_log function.
+3. Macro Handles Settings: The LOG_ERROR (etc.) macros take a LogRedirection object and passes its members to the server_log function.
 Example:
@ -87,15 +45,58 @@ LOG_ERROR("Default error", {});  // Uses defaults from an empty LogRedirection o
 This compact usage is possible because:
 {} creates a temporary LogRedirection object with its members implicitly initialized to the default values.
-The macro passes those defaults to server_log, achieving the desired behavior without requiring explicit variable declarations at every call.
+The macro passes those defaults to server_log, achieving the desired behaviour without requiring explicit variable declarations at every call.
 Customization:
-When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR for tailored logging behaviour:
+When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR (etc.) for tailored logging behaviour:
 LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"};
 LOG_ERROR("Custom error", "Details", custom_settings);
 */
 // ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
 struct LogRedirection {
  // Set default values for redirection targets and reset strings
  std::string stdout_target = "stdout.log"; // will be in ./build and eventually overwritten
  std::string stdout_reset = "/dev/stdout";
  std::string stderr_target = "stderr.log"; // will be in ./build and eventually overwritten
  std::string stderr_reset = "/dev/stderr";
 };
 LogRedirection log_settings;    // TODO: avoid global declaration
 #if SERVER_VERBOSE != 1
 #define LOG_VERBOSE(MSG, ...)   // if not verbose logging just return empty
 #else
 #define LOG_VERBOSE(MSG, ...)                                            \
    do                                                                   \
    {                                                                    \
        if (server_verbose)                                              \
        {                                                                \
            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__,     \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset);   \
        }                                                                \
    } while (0)     // this is always false so the loop only compiles once but is treated as a single statement
 #endif
 #define LOG_ERROR(MSG, ...) \
    server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 #define LOG_WARNING(MSG, ...) \
    server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 #define LOG_INFO(MSG, ...) \
    server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 //
 // parallel
 //