improve Llamaserver.py
This commit is contained in:
parent
265741aa0f
commit
480089d00d
3 changed files with 74 additions and 53 deletions
|
@ -6,11 +6,21 @@ from time import sleep
|
||||||
|
|
||||||
def print_dict(data):
|
def print_dict(data):
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
|
#for k, v in data.items():
|
||||||
|
# print(f"Key: {k}; Value: {v}\n")
|
||||||
|
#input("",)
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
if isinstance(v, dict):
|
if isinstance(v, dict):
|
||||||
print_dict(v)
|
print_dict(v)
|
||||||
elif k == "content":
|
elif k == "content":
|
||||||
print(f"Key: {k:>30}: {v}")
|
print(f"Model: {data['model']}")
|
||||||
|
print(f"Max tokens predicted: {data['generation_settings']['n_predict']}")
|
||||||
|
print(f"Prompt evaluation time = {data['timings']['prompt_ms']}")
|
||||||
|
print(f"Token generation time = {data['timings']['predicted_ms']}")
|
||||||
|
print(f"Tokens cached = {data['tokens_cached']}")
|
||||||
|
print(f"Tokens evaluated = {data['tokens_evaluated']}")
|
||||||
|
print(f"Tokens actually predicted = {data['tokens_predicted']}\n")
|
||||||
|
print(f"Response: {v}")
|
||||||
return
|
return
|
||||||
elif isinstance(data, list):
|
elif isinstance(data, list):
|
||||||
for entry in v:
|
for entry in v:
|
||||||
|
@ -55,7 +65,7 @@ def send_request(q, question, event, count, num_requests):
|
||||||
|
|
||||||
system = "You are a helpful assistant who answers all requests \
|
system = "You are a helpful assistant who answers all requests \
|
||||||
courteously and accurately without undue repetion. \
|
courteously and accurately without undue repetion. \
|
||||||
you pay close attention to the nuance of a question and response accordingly."
|
You pay close attention to the nuance of a question and respond accordingly."
|
||||||
|
|
||||||
data = {'system': system, 'prompt': question}
|
data = {'system': system, 'prompt': question}
|
||||||
|
|
||||||
|
@ -131,9 +141,15 @@ if __name__ == "__main__":
|
||||||
"Israel", "Egypt", "Kenya", "Chile", "Mexico", "Canada", "Ecuador", "Brazil", "Argentina", "Colombia",
|
"Israel", "Egypt", "Kenya", "Chile", "Mexico", "Canada", "Ecuador", "Brazil", "Argentina", "Colombia",
|
||||||
"Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel",
|
"Bulgaria", "Romania", "Finland", "Sweden", "Norway", "Denmark", "Tanzania", "Israel",
|
||||||
"Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"]
|
"Latvia", "Lithuania", "Estonia", "Pakistan", "Sri Lanka", "Malawi", "Mozambique"]
|
||||||
|
|
||||||
|
philosopher_list = ["Blaise Pascal", "Thomas Hobbes", "Georg Frederik Hegel", "Søren Kierkegaard", "Karl Marx", "Arthur Schopenhauer",
|
||||||
|
"Ludwig Feuerbach", "Friedrich Nietzsche", "Max Weber", "Sigmund Freud", "Carl Jung",
|
||||||
|
"Melanie Klein", "John Puddefoot"]
|
||||||
|
|
||||||
|
num_requests = len(philosopher_list)
|
||||||
|
|
||||||
for i in range(num_requests):
|
for i in range(num_requests):
|
||||||
writer = writer_list[i % len(writer_list)]
|
writer = philosopher_list[i % num_requests]
|
||||||
question = f"Tell me about the writings of {writer}."
|
question = f"Tell me about the writings of {writer}."
|
||||||
# NOTE: don't pass the parameter as a function call; pass in args
|
# NOTE: don't pass the parameter as a function call; pass in args
|
||||||
print(f"Processing request {i} / {num_requests}: {question}\n")
|
print(f"Processing request {i} / {num_requests}: {question}\n")
|
||||||
|
|
|
@ -390,7 +390,8 @@ static void kvgraphics(std::vector<server_slot>& slots) {
|
||||||
} else {
|
} else {
|
||||||
slot_symbol3 = "\u22EE";
|
slot_symbol3 = "\u22EE";
|
||||||
}
|
}
|
||||||
printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str());
|
std::string prompt = slots[i].prompt.dump();
|
||||||
|
printf(" %4zu/%5zu %2d %s %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str(), prompt.c_str());
|
||||||
}
|
}
|
||||||
printf("\033[5;0H"); // just start two lines below the heading
|
printf("\033[5;0H"); // just start two lines below the heading
|
||||||
//printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5); // move cursor to end of cache display and clear thereafter
|
//printf("\n\033[%d;0H\033[%dJ", 10, num_blocks+5); // move cursor to end of cache display and clear thereafter
|
||||||
|
@ -2284,6 +2285,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||||
sparams.public_path = argv[i];
|
sparams.public_path = argv[i];
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
Do we really need to be able to feed a single user to the server?
|
||||||
else if (arg == "--api-key")
|
else if (arg == "--api-key")
|
||||||
{
|
{
|
||||||
if (++i >= argc)
|
if (++i >= argc)
|
||||||
|
@ -2310,7 +2312,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
sparams.api_keys = get_userdata(argv[i]);
|
sparams.api_keys = get_userdata(argv[i]); // read apikey json data
|
||||||
|
|
||||||
key_file.close();
|
key_file.close();
|
||||||
}
|
}
|
||||||
|
@ -3152,14 +3154,16 @@ int main(int argc, char **argv)
|
||||||
// Set the base directory for serving static files
|
// Set the base directory for serving static files
|
||||||
svr.set_base_dir(sparams.public_path);
|
svr.set_base_dir(sparams.public_path);
|
||||||
|
|
||||||
|
// set the host port to listen on
|
||||||
std::unordered_map<std::string, std::string> log_data;
|
std::unordered_map<std::string, std::string> log_data;
|
||||||
log_data["hostname"] = sparams.hostname;
|
log_data["hostname"] = sparams.hostname;
|
||||||
log_data["port"] = std::to_string(sparams.port);
|
log_data["port"] = std::to_string(sparams.port);
|
||||||
|
|
||||||
if (sparams.api_keys.size() == 1) { // what happens if the size is zero?
|
// process api keys
|
||||||
|
if (sparams.api_keys.size() == 1) { // should we trap what happens if the size is zero?
|
||||||
log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4);
|
log_data["api_key"] = "api_key: ****" + sparams.api_keys[0][0].substr(sparams.api_keys[0][0].length() - 4);
|
||||||
} else if (sparams.api_keys.size() > 1) {
|
} else if (sparams.api_keys.size() > 1) {
|
||||||
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
|
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; // diagnostic; suppress eventually
|
||||||
}
|
}
|
||||||
for (auto &item : sparams.api_keys) {
|
for (auto &item : sparams.api_keys) {
|
||||||
std::string username = item.first;
|
std::string username = item.first;
|
||||||
|
|
|
@ -20,48 +20,6 @@ extern bool server_log_json;
|
||||||
#define SERVER_VERBOSE 1
|
#define SERVER_VERBOSE 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if SERVER_VERBOSE != 1
|
|
||||||
#define LOG_VERBOSE(MSG, ...) // if not verbose logging just return empty
|
|
||||||
#else
|
|
||||||
#define LOG_VERBOSE(MSG, ...) \
|
|
||||||
do \
|
|
||||||
{ \
|
|
||||||
if (server_verbose) \
|
|
||||||
{ \
|
|
||||||
server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__, \
|
|
||||||
log_settings.stdout_target, log_settings.stderr_target, \
|
|
||||||
log_settings.stdout_reset, log_settings.stderr_reset); \
|
|
||||||
} \
|
|
||||||
} while (0) // this is always false so the loop only compiles once but is treated as a single statement
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
|
|
||||||
|
|
||||||
struct LogRedirection {
|
|
||||||
// Set default values for redirection targets and reset strings
|
|
||||||
std::string stdout_target = "stdout_log.log";
|
|
||||||
std::string stdout_reset = "/dev/stdout";
|
|
||||||
std::string stderr_target = "stderr_log.log";
|
|
||||||
std::string stderr_reset = "/dev/stderr";
|
|
||||||
};
|
|
||||||
|
|
||||||
LogRedirection log_settings;
|
|
||||||
|
|
||||||
#define LOG_ERROR(MSG, ...) \
|
|
||||||
server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
|
|
||||||
log_settings.stdout_target, log_settings.stderr_target, \
|
|
||||||
log_settings.stdout_reset, log_settings.stderr_reset)
|
|
||||||
|
|
||||||
#define LOG_WARNING(MSG, ...) \
|
|
||||||
server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
|
|
||||||
log_settings.stdout_target, log_settings.stderr_target, \
|
|
||||||
log_settings.stdout_reset, log_settings.stderr_reset)
|
|
||||||
|
|
||||||
#define LOG_INFO(MSG, ...) \
|
|
||||||
server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
|
|
||||||
log_settings.stdout_target, log_settings.stderr_target, \
|
|
||||||
log_settings.stdout_reset, log_settings.stderr_reset)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Example usage (WIP):
|
// Example usage (WIP):
|
||||||
LogRedirection default_settings; // Use defaults but not necessary to say so
|
LogRedirection default_settings; // Use defaults but not necessary to say so
|
||||||
|
@ -78,7 +36,7 @@ Yes, using the LogRedirection struct approach eliminates the need to explicitly
|
||||||
|
|
||||||
1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable.
|
1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable.
|
||||||
2. Default Values: The struct's members have default values defined, serving as fallbacks.
|
2. Default Values: The struct's members have default values defined, serving as fallbacks.
|
||||||
3. Macro Handles Settings: The LOG_ERROR macro takes a LogRedirection object and passes its members to the server_log function.
|
3. Macro Handles Settings: The LOG_ERROR (etc.) macros take a LogRedirection object and passes its members to the server_log function.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
|
@ -87,15 +45,58 @@ LOG_ERROR("Default error", {}); // Uses defaults from an empty LogRedirection o
|
||||||
This compact usage is possible because:
|
This compact usage is possible because:
|
||||||
|
|
||||||
{} creates a temporary LogRedirection object with its members implicitly initialized to the default values.
|
{} creates a temporary LogRedirection object with its members implicitly initialized to the default values.
|
||||||
The macro passes those defaults to server_log, achieving the desired behavior without requiring explicit variable declarations at every call.
|
The macro passes those defaults to server_log, achieving the desired behaviour without requiring explicit variable declarations at every call.
|
||||||
Customization:
|
Customization:
|
||||||
|
|
||||||
When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR for tailored logging behaviour:
|
When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR (etc.) for tailored logging behaviour:
|
||||||
|
|
||||||
LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"};
|
LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"};
|
||||||
LOG_ERROR("Custom error", "Details", custom_settings);
|
LOG_ERROR("Custom error", "Details", custom_settings);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
|
||||||
|
|
||||||
|
struct LogRedirection {
|
||||||
|
// Set default values for redirection targets and reset strings
|
||||||
|
std::string stdout_target = "stdout.log"; // will be in ./build and eventually overwritten
|
||||||
|
std::string stdout_reset = "/dev/stdout";
|
||||||
|
std::string stderr_target = "stderr.log"; // will be in ./build and eventually overwritten
|
||||||
|
std::string stderr_reset = "/dev/stderr";
|
||||||
|
};
|
||||||
|
|
||||||
|
LogRedirection log_settings; // TODO: avoid global declaration
|
||||||
|
|
||||||
|
#if SERVER_VERBOSE != 1
|
||||||
|
#define LOG_VERBOSE(MSG, ...) // if not verbose logging just return empty
|
||||||
|
#else
|
||||||
|
#define LOG_VERBOSE(MSG, ...) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
if (server_verbose) \
|
||||||
|
{ \
|
||||||
|
server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__, \
|
||||||
|
log_settings.stdout_target, log_settings.stderr_target, \
|
||||||
|
log_settings.stdout_reset, log_settings.stderr_reset); \
|
||||||
|
} \
|
||||||
|
} while (0) // this is always false so the loop only compiles once but is treated as a single statement
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LOG_ERROR(MSG, ...) \
|
||||||
|
server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
|
||||||
|
log_settings.stdout_target, log_settings.stderr_target, \
|
||||||
|
log_settings.stdout_reset, log_settings.stderr_reset)
|
||||||
|
|
||||||
|
#define LOG_WARNING(MSG, ...) \
|
||||||
|
server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
|
||||||
|
log_settings.stdout_target, log_settings.stderr_target, \
|
||||||
|
log_settings.stdout_reset, log_settings.stderr_reset)
|
||||||
|
|
||||||
|
#define LOG_INFO(MSG, ...) \
|
||||||
|
server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
|
||||||
|
log_settings.stdout_target, log_settings.stderr_target, \
|
||||||
|
log_settings.stdout_reset, log_settings.stderr_reset)
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// parallel
|
// parallel
|
||||||
//
|
//
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue