improve Llamaserver.py

This commit is contained in:
pudepiedj 2024-03-03 11:20:10 +00:00
parent 265741aa0f
commit 480089d00d
3 changed files with 74 additions and 53 deletions

View file

@ -6,11 +6,21 @@ from time import sleep
def print_dict(data): def print_dict(data):
if isinstance(data, dict): if isinstance(data, dict):
#for k, v in data.items():
# print(f"Key: {k}; Value: {v}\n")
#input("",)
for k, v in data.items(): for k, v in data.items():
if isinstance(v, dict): if isinstance(v, dict):
print_dict(v) print_dict(v)
elif k == "content": elif k == "content":
print(f"Key: {k:>30}: {v}") print(f"Model: {data['model']}")
print(f"Max tokens predicted: {data['generation_settings']['n_predict']}")
print(f"Prompt evaluation time = {data['timings']['prompt_ms']}")
print(f"Token generation time = {data['timings']['predicted_ms']}")
print(f"Tokens cached = {data['tokens_cached']}")
print(f"Tokens evaluated = {data['tokens_evaluated']}")
print(f"Tokens actually predicted = {data['tokens_predicted']}\n")
print(f"Response: {v}")
return return
elif isinstance(data, list): elif isinstance(data, list):
for entry in v: for entry in v:
@ -55,7 +65,7 @@ def send_request(q, question, event, count, num_requests):
system = "You are a helpful assistant who answers all requests \ system = "You are a helpful assistant who answers all requests \
courteously and accurately without undue repetion. \ courteously and accurately without undue repetion. \
you pay close attention to the nuance of a question and response accordingly." You pay close attention to the nuance of a question and respond accordingly."
data = {'system': system, 'prompt': question} data = {'system': system, 'prompt': question}
@ -132,8 +142,14 @@ if __name__ == "__main__":
"Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel", "Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel",
"Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"] "Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"]
philosopher_list = ["Blaise Pascal", "Thomas Hobbes", "Georg Frederik Hegel", "Søren Kierkegaard", "Karl Marx", "Arthur Schopenhauer",
"Ludwig Feuerbach", "Friedrich Nietzsche", "Max Weber", "Sigmund Freud", "Carl Jung",
"Melanie Klein", "John Puddefoot"]
num_requests = len(philosopher_list)
for i in range(num_requests): for i in range(num_requests):
writer = writer_list[i % len(writer_list)] writer = philosopher_list[i % num_requests]
question = f"Tell me about the writings of {writer}." question = f"Tell me about the writings of {writer}."
# NOTE: don't pass the parameter as a function call; pass in args # NOTE: don't pass the parameter as a function call; pass in args
print(f"Processing request {i} / {num_requests}: {question}\n") print(f"Processing request {i} / {num_requests}: {question}\n")

View file

@ -390,7 +390,8 @@ static void kvgraphics(std::vector<server_slot>& slots) {
} else { } else {
slot_symbol3 = "\u22EE"; slot_symbol3 = "\u22EE";
} }
printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str()); std::string prompt = slots[i].prompt.dump();
printf(" %4zu/%5zu %2d %s %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str(), prompt.c_str());
} }
printf("\033[5;0H"); // just start two lines below the heading printf("\033[5;0H"); // just start two lines below the heading
//printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5); // move cursor to end of cache display and clear thereafter //printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5); // move cursor to end of cache display and clear thereafter
@ -2284,6 +2285,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
sparams.public_path = argv[i]; sparams.public_path = argv[i];
} }
/* /*
Do we really need to be able to feed a single user to the server?
else if (arg == "--api-key") else if (arg == "--api-key")
{ {
if (++i >= argc) if (++i >= argc)
@ -2310,7 +2312,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
invalid_param = true; invalid_param = true;
break; break;
} }
sparams.api_keys = get_userdata(argv[i]); sparams.api_keys = get_userdata(argv[i]); // read apikey json data
key_file.close(); key_file.close();
} }
@ -3152,14 +3154,16 @@ int main(int argc, char **argv)
// Set the base directory for serving static files // Set the base directory for serving static files
svr.set_base_dir(sparams.public_path); svr.set_base_dir(sparams.public_path);
// set the host port to listen on
std::unordered_map<std::string, std::string> log_data; std::unordered_map<std::string, std::string> log_data;
log_data["hostname"] = sparams.hostname; log_data["hostname"] = sparams.hostname;
log_data["port"] = std::to_string(sparams.port); log_data["port"] = std::to_string(sparams.port);
if (sparams.api_keys.size() == 1) { // what happens if the size is zero? // process api keys
if (sparams.api_keys.size() == 1) { // should we trap what happens if the size is zero?
log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4); log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4);
} else if (sparams.api_keys.size() > 1) { } else if (sparams.api_keys.size() > 1) {
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; // diagnostic; suppress eventually
} }
for (auto &item : sparams.api_keys) { for (auto &item : sparams.api_keys) {
std::string username = item.first; std::string username = item.first;

View file

@ -20,48 +20,6 @@ extern bool server_log_json;
#define SERVER_VERBOSE 1 #define SERVER_VERBOSE 1
#endif #endif
#if SERVER_VERBOSE != 1
#define LOG_VERBOSE(MSG, ...) // if not verbose logging just return empty
#else
#define LOG_VERBOSE(MSG, ...) \
do \
{ \
if (server_verbose) \
{ \
server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset); \
} \
} while (0) // this is always false so the loop only compiles once but is treated as a single statement
#endif
// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
struct LogRedirection {
// Set default values for redirection targets and reset strings
std::string stdout_target = "stdout_log.log";
std::string stdout_reset = "/dev/stdout";
std::string stderr_target = "stderr_log.log";
std::string stderr_reset = "/dev/stderr";
};
LogRedirection log_settings;
#define LOG_ERROR(MSG, ...) \
server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset)
#define LOG_WARNING(MSG, ...) \
server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset)
#define LOG_INFO(MSG, ...) \
server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset)
/* /*
// Example usage (WIP): // Example usage (WIP):
LogRedirection default_settings; // Use defaults but not necessary to say so LogRedirection default_settings; // Use defaults but not necessary to say so
@ -78,7 +36,7 @@ Yes, using the LogRedirection struct approach eliminates the need to explicitly
1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable. 1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable.
2. Default Values: The struct's members have default values defined, serving as fallbacks. 2. Default Values: The struct's members have default values defined, serving as fallbacks.
3. Macro Handles Settings: The LOG_ERROR macro takes a LogRedirection object and passes its members to the server_log function. 3. Macro Handles Settings: The LOG_ERROR (etc.) macros take a LogRedirection object and passes its members to the server_log function.
Example: Example:
@ -87,15 +45,58 @@ LOG_ERROR("Default error", {}); // Uses defaults from an empty LogRedirection o
This compact usage is possible because: This compact usage is possible because:
{} creates a temporary LogRedirection object with its members implicitly initialized to the default values. {} creates a temporary LogRedirection object with its members implicitly initialized to the default values.
The macro passes those defaults to server_log, achieving the desired behavior without requiring explicit variable declarations at every call. The macro passes those defaults to server_log, achieving the desired behaviour without requiring explicit variable declarations at every call.
Customization: Customization:
When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR for tailored logging behaviour: When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR (etc.) for tailored logging behaviour:
LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"}; LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"};
LOG_ERROR("Custom error", "Details", custom_settings); LOG_ERROR("Custom error", "Details", custom_settings);
*/ */
// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
struct LogRedirection {
// Set default values for redirection targets and reset strings
std::string stdout_target = "stdout.log"; // will be in ./build and eventually overwritten
std::string stdout_reset = "/dev/stdout";
std::string stderr_target = "stderr.log"; // will be in ./build and eventually overwritten
std::string stderr_reset = "/dev/stderr";
};
LogRedirection log_settings; // TODO: avoid global declaration
#if SERVER_VERBOSE != 1
#define LOG_VERBOSE(MSG, ...) // if not verbose logging just return empty
#else
#define LOG_VERBOSE(MSG, ...) \
do \
{ \
if (server_verbose) \
{ \
server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset); \
} \
} while (0) // this is always false so the loop only compiles once but is treated as a single statement
#endif
#define LOG_ERROR(MSG, ...) \
server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset)
#define LOG_WARNING(MSG, ...) \
server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset)
#define LOG_INFO(MSG, ...) \
server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
log_settings.stdout_target, log_settings.stderr_target, \
log_settings.stdout_reset, log_settings.stderr_reset)
// //
// parallel // parallel
// //