From 480089d00d4edf203b1564319c24adb922f609aa Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Sun, 3 Mar 2024 11:20:10 +0000 Subject: [PATCH] improve Llamaserver.py --- Llamaserver.py | 24 ++++++++-- examples/server/server.cpp | 12 +++-- examples/server/utils.hpp | 91 +++++++++++++++++++------------------- 3 files changed, 74 insertions(+), 53 deletions(-) diff --git a/Llamaserver.py b/Llamaserver.py index b419e6942..33c615008 100644 --- a/Llamaserver.py +++ b/Llamaserver.py @@ -6,11 +6,21 @@ from time import sleep def print_dict(data): if isinstance(data, dict): + #for k, v in data.items(): + # print(f"Key: {k}; Value: {v}\n") + #input("",) for k, v in data.items(): if isinstance(v, dict): print_dict(v) - elif k == "content": - print(f"Key: {k:>30}: {v}") + elif k == "content": + print(f"Model: {data['model']}") + print(f"Max tokens predicted: {data['generation_settings']['n_predict']}") + print(f"Prompt evaluation time = {data['timings']['prompt_ms']}") + print(f"Token generation time = {data['timings']['predicted_ms']}") + print(f"Tokens cached = {data['tokens_cached']}") + print(f"Tokens evaluated = {data['tokens_evaluated']}") + print(f"Tokens actually predicted = {data['tokens_predicted']}\n") + print(f"Response: {v}") return elif isinstance(data, list): for entry in v: @@ -55,7 +65,7 @@ def send_request(q, question, event, count, num_requests): system = "You are a helpful assistant who answers all requests \ courteously and accurately without undue repetion. \ -you pay close attention to the nuance of a question and response accordingly." +You pay close attention to the nuance of a question and respond accordingly." data = {'system': system, 'prompt': question} @@ -131,9 +141,15 @@ if __name__ == "__main__": "Israel", "Egypt", "Kenya", "Chile", "Mexico", "Canada", "Ecuador", "Brazil", "Argentina", "Colombia", "Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel", "Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"] + + philosopher_list = ["Blaise Pascal", "Thomas Hobbes", "Georg Frederik Hegel", "Søren Kierkegaard", "Karl Marx", "Arthur Schopenhauer", + "Ludwig Feuerbach", "Friedrich Nietzsche", "Max Weber", "Sigmund Freud", "Carl Jung", + "Melanie Klein", "John Puddefoot"] + + num_requests = len(philosopher_list) for i in range(num_requests): - writer = writer_list[i % len(writer_list)] + writer = philosopher_list[i % num_requests] question = f"Tell me about the writings of {writer}." # NOTE: don't pass the parameter as a function call; pass in args print(f"Processing request {i} / {num_requests}: {question}\n") diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d237d2e66..4a5a875c4 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -390,7 +390,8 @@ static void kvgraphics(std::vector& slots) { } else { slot_symbol3 = "\u22EE"; } - printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str()); + std::string prompt = slots[i].prompt.dump(); + printf(" %4zu/%5zu %2d %s %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str(), prompt.c_str()); } printf("\033[5;0H"); // just start two lines below the heading //printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5); // move cursor to end of cache display and clear thereafter @@ -2284,6 +2285,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, sparams.public_path = argv[i]; } /* + Do we really need to be able to feed a single user to the server? else if (arg == "--api-key") { if (++i >= argc) @@ -2310,7 +2312,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } - sparams.api_keys = get_userdata(argv[i]); + sparams.api_keys = get_userdata(argv[i]); // read apikey json data key_file.close(); } @@ -3152,14 +3154,16 @@ int main(int argc, char **argv) // Set the base directory for serving static files svr.set_base_dir(sparams.public_path); + // set the host port to listen on std::unordered_map log_data; log_data["hostname"] = sparams.hostname; log_data["port"] = std::to_string(sparams.port); - if (sparams.api_keys.size() == 1) { // what happens if the size is zero? + // process api keys + if (sparams.api_keys.size() == 1) { // should we trap what happens if the size is zero? log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4); } else if (sparams.api_keys.size() > 1) { - log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; + log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; // diagnostic; suppress eventually } for (auto &item : sparams.api_keys) { std::string username = item.first; diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index dfd38e2ce..9af0ffaf4 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -20,48 +20,6 @@ extern bool server_log_json; #define SERVER_VERBOSE 1 #endif -#if SERVER_VERBOSE != 1 -#define LOG_VERBOSE(MSG, ...) // if not verbose logging just return empty -#else -#define LOG_VERBOSE(MSG, ...) \ - do \ - { \ - if (server_verbose) \ - { \ - server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__, \ - log_settings.stdout_target, log_settings.stderr_target, \ - log_settings.stdout_reset, log_settings.stderr_reset); \ - } \ - } while (0) // this is always false so the loop only compiles once but is treated as a single statement -#endif - -// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR - -struct LogRedirection { - // Set default values for redirection targets and reset strings - std::string stdout_target = "stdout_log.log"; - std::string stdout_reset = "/dev/stdout"; - std::string stderr_target = "stderr_log.log"; - std::string stderr_reset = "/dev/stderr"; -}; - -LogRedirection log_settings; - -#define LOG_ERROR(MSG, ...) \ - server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \ - log_settings.stdout_target, log_settings.stderr_target, \ - log_settings.stdout_reset, log_settings.stderr_reset) - -#define LOG_WARNING(MSG, ...) \ - server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \ - log_settings.stdout_target, log_settings.stderr_target, \ - log_settings.stdout_reset, log_settings.stderr_reset) - -#define LOG_INFO(MSG, ...) \ - server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \ - log_settings.stdout_target, log_settings.stderr_target, \ - log_settings.stdout_reset, log_settings.stderr_reset) - /* // Example usage (WIP): LogRedirection default_settings; // Use defaults but not necessary to say so @@ -78,7 +36,7 @@ Yes, using the LogRedirection struct approach eliminates the need to explicitly 1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable. 2. Default Values: The struct's members have default values defined, serving as fallbacks. -3. Macro Handles Settings: The LOG_ERROR macro takes a LogRedirection object and passes its members to the server_log function. +3. Macro Handles Settings: The LOG_ERROR (etc.) macros take a LogRedirection object and passes its members to the server_log function. Example: @@ -87,15 +45,58 @@ LOG_ERROR("Default error", {}); // Uses defaults from an empty LogRedirection o This compact usage is possible because: {} creates a temporary LogRedirection object with its members implicitly initialized to the default values. -The macro passes those defaults to server_log, achieving the desired behavior without requiring explicit variable declarations at every call. +The macro passes those defaults to server_log, achieving the desired behaviour without requiring explicit variable declarations at every call. Customization: -When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR for tailored logging behaviour: +When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR (etc.) for tailored logging behaviour: LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"}; LOG_ERROR("Custom error", "Details", custom_settings); */ +// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR + +struct LogRedirection { + // Set default values for redirection targets and reset strings + std::string stdout_target = "stdout.log"; // will be in ./build and eventually overwritten + std::string stdout_reset = "/dev/stdout"; + std::string stderr_target = "stderr.log"; // will be in ./build and eventually overwritten + std::string stderr_reset = "/dev/stderr"; +}; + +LogRedirection log_settings; // TODO: avoid global declaration + +#if SERVER_VERBOSE != 1 +#define LOG_VERBOSE(MSG, ...) // if not verbose logging just return empty +#else +#define LOG_VERBOSE(MSG, ...) \ + do \ + { \ + if (server_verbose) \ + { \ + server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__, \ + log_settings.stdout_target, log_settings.stderr_target, \ + log_settings.stdout_reset, log_settings.stderr_reset); \ + } \ + } while (0) // this is always false so the loop only compiles once but is treated as a single statement +#endif + +#define LOG_ERROR(MSG, ...) \ + server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \ + log_settings.stdout_target, log_settings.stderr_target, \ + log_settings.stdout_reset, log_settings.stderr_reset) + +#define LOG_WARNING(MSG, ...) \ + server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \ + log_settings.stdout_target, log_settings.stderr_target, \ + log_settings.stdout_reset, log_settings.stderr_reset) + +#define LOG_INFO(MSG, ...) \ + server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \ + log_settings.stdout_target, log_settings.stderr_target, \ + log_settings.stdout_reset, log_settings.stderr_reset) + + // // parallel //