From 54e4271487f8fbccd08c3624641bf2a5c3ba9c7f Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 24 Feb 2024 20:20:45 +0100 Subject: [PATCH] server: logs: allow to choose log format in json or plain text --- examples/server/README.md | 4 ++- examples/server/server.cpp | 23 +++++++++++++ examples/server/utils.hpp | 66 +++++++++++++++++++++++--------------- 3 files changed, 66 insertions(+), 27 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 0c43ac4c9..01091bb4c 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -39,9 +39,11 @@ see https://github.com/ggerganov/llama.cpp/issues/1437 - `--mmproj MMPROJ_FILE`: Path to a multimodal projector file for LLaVA. - `--grp-attn-n`: Set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w` - `--grp-attn-w`: Set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n` -- `-n, --n-predict`: Set the maximum tokens to predict (default: -1) +- `-n N, --n-predict N`: Set the maximum tokens to predict (default: -1) - `--slots-endpoint-disable`: To disable slots state monitoring endpoint. Slots state may contain user data, prompts included. - `--chat-template JINJA_TEMPLATE`: Set custom jinja chat template. This parameter accepts a string, not a file name (default: template taken from model's metadata). We only support [some pre-defined templates](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template) +- `--log-disable`: Output logs to stdout only, default: enabled. +- `--log-format FORMAT`: Define the log output to FORMAT: json or text (default: json) ## Build diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ed574344c..56dddd507 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -46,6 +46,7 @@ struct server_params }; bool server_verbose = false; +bool server_log_json = true; static size_t common_part(const std::vector &a, const std::vector &b) { @@ -2037,6 +2038,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, printf(" -ctv TYPE, --cache-type-v TYPE\n"); printf(" KV cache data type for V (default: f16)\n"); printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n"); + printf(" --log-format log output format: json or text (default: json)\n"); printf(" --log-disable disables logging to a file.\n"); printf(" --slots-endpoint-disable disables slots monitoring endpoint.\n"); printf("\n"); @@ -2489,6 +2491,27 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, } params.mmproj = argv[i]; } + else if (arg == "--log-format") + { + if (++i >= argc) + { + invalid_param = true; + break; + } + if (std::strcmp(argv[i], "json") == 0) + { + server_log_json = true; + } + else if (std::strcmp(argv[i], "text") == 0) + { + server_log_json = false; + } + else + { + invalid_param = true; + break; + } + } else if (arg == "--log-disable") { log_set_target(stdout); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 9c60e38e4..6165d1cac 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -14,6 +14,7 @@ using json = nlohmann::json; extern bool server_verbose; +extern bool server_log_json; #ifndef SERVER_VERBOSE #define SERVER_VERBOSE 1 @@ -133,36 +134,49 @@ struct completion_token_output std::string text_to_send; }; -static inline void server_log(const char *level, const char *function, int /*line*/, const char *message, const nlohmann::ordered_json &extra) +static void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) { - char buf[1024]; - snprintf(buf, 1024, "%24s %4s: %-80s", function, level, message); + std::stringstream ss_tid; + ss_tid << std::this_thread::get_id(); + json log = nlohmann::ordered_json{ + {"tid", ss_tid.str()}, + {"timestamp", time(nullptr)}, + }; - nlohmann::ordered_json log; + if (server_log_json) { + log.merge_patch( + { + {"level", level}, + {"function", function}, + {"line", line}, + {"msg", message}, + }); + if (!extra.empty()) { + log.merge_patch(extra); + } - { - std::stringstream ss_thread_id; - ss_thread_id << std::this_thread::get_id(); - log.push_back({"tid", ss_thread_id.str()}); + const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace); + printf("%.*s\n", (int)str.size(), str.data()); + fflush(stdout); + } else { + char buf[1024]; + snprintf(buf, 1024, "%24s %4s: %-80s", function, level, message); + + if (!extra.empty()) { + log.merge_patch(extra); + } + std::stringstream ss; + ss << buf; + for (const auto& el : log.items()) + { + snprintf(buf, 1024, " %s=%s", el.key().c_str(), el.value().dump().c_str()); + ss << buf; + } + + const std::string str = ss.str(); + printf("%.*s\n", (int)str.size(), str.data()); + fflush(stdout); } - - log.merge_patch( - { - //{"timestamp", time(nullptr)}, - //{"level", level}, - //{"function", function}, - //{"line", line}, - {"msg", buf}, - }); - - if (!extra.empty()) - { - log.merge_patch(extra); - } - - const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace); - printf("%.*s\n", (int)str.size(), str.data()); - fflush(stdout); } //