server: logs - unified format and --log-format option (#5700)

* server: logs - always use JSON logger, add add thread_id in message, log task_id and slot_id * server : skip GH copilot requests from logging * server : change message format of server_log() * server : no need to repeat log in comment * server : log style consistency * server : fix compile warning * server : fix tests regex patterns on M2 Ultra * server: logs: PR feedback on log level * server: logs: allow to choose log format in json or plain text * server: tests: output server logs in text * server: logs switch init logs to server logs macro * server: logs ensure value json value does not raised error * server: logs reduce level VERBOSE to VERB to max 4 chars * server: logs lower case as other log messages * server: logs avoid static in general Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server: logs PR feedback: change text log format to: LEVEL [function_name] message | additional=data --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-02-25 13:50:32 +01:00 · 2024-02-25 13:50:32 +01:00 · 930b178026
commit 930b178026
parent d52d7819b8
6 changed files with 231 additions and 80 deletions
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -14,6 +14,7 @@
 using json = nlohmann::json;

 extern bool server_verbose;
+extern bool server_log_json;

 #ifndef SERVER_VERBOSE
 #define SERVER_VERBOSE 1
@ -27,14 +28,14 @@ extern bool server_verbose;
    {                                                                    \
        if (server_verbose)                                              \
        {                                                                \
-            server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \
+            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__); \
        }                                                                \
    } while (0)
 #endif

-#define LOG_ERROR(  MSG, ...) server_log("ERROR",   __func__, __LINE__, MSG, __VA_ARGS__)
-#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
-#define LOG_INFO(   MSG, ...) server_log("INFO",    __func__, __LINE__, MSG, __VA_ARGS__)
+#define LOG_ERROR(  MSG, ...) server_log("ERR",  __func__, __LINE__, MSG, __VA_ARGS__)
+#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
+#define LOG_INFO(   MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)

 //
 // parallel
@ -133,26 +134,48 @@ struct completion_token_output
    std::string text_to_send;
 };

-static inline void server_log(const char *level, const char *function, int line,
-                       const char *message, const nlohmann::ordered_json &extra)
+static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra)
 {
-    nlohmann::ordered_json log
-    {
+    std::stringstream ss_tid;
+    ss_tid << std::this_thread::get_id();
+    json log = nlohmann::ordered_json{
+        {"tid", ss_tid.str()},
        {"timestamp", time(nullptr)},
-        {"level",     level},
-        {"function",  function},
-        {"line",      line},
-        {"message",   message},
    };

-    if (!extra.empty())
-    {
-        log.merge_patch(extra);
-    }
+    if (server_log_json) {
+        log.merge_patch(
+                {
+                        {"level",     level},
+                        {"function",  function},
+                        {"line",      line},
+                        {"msg",       message},
+                });
+        if (!extra.empty()) {
+            log.merge_patch(extra);
+        }

-    const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
-    printf("%.*s\n", (int)str.size(), str.data());
-    fflush(stdout);
+        std::cout << log.dump(-1, ' ', false, json::error_handler_t::replace) << "\n" << std::flush;
+    } else {
+        char buf[1024];
+        snprintf(buf, 1024, "%4s [%24s] %s", level, function, message);
+
+        if (!extra.empty()) {
+            log.merge_patch(extra);
+        }
+        std::stringstream ss;
+        ss << buf << " |";
+        for (const auto& el : log.items())
+        {
+            const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);
+            snprintf(buf, 1024, " %s=%s", el.key().c_str(), value.c_str());
+            ss << buf;
+        }
+
+        const std::string str = ss.str();
+        printf("%.*s\n", (int)str.size(), str.data());
+        fflush(stdout);
+    }
 }

 //
@ -234,6 +257,7 @@ struct llama_server_queue {
        std::unique_lock<std::mutex> lock(mutex_tasks);
        if (task.id == -1) {
            task.id = id++;
+            LOG_VERBOSE("new task id", {{"new_id", task.id}});
        }
        queue_tasks.push_back(std::move(task));
        condition_tasks.notify_one();
@ -249,7 +273,9 @@ struct llama_server_queue {
    // Get the next id for creating anew task
    int get_new_id() {
        std::unique_lock<std::mutex> lock(mutex_tasks);
-        return id++;
+        int new_id = id++;
+        LOG_VERBOSE("new task id", {{"new_id", new_id}});
+        return new_id;
    }

    // Register function to process a new task
@ -290,8 +316,7 @@ struct llama_server_queue {
    void start_loop() {
        running = true;
        while (true) {
-            // new task arrived
-            LOG_VERBOSE("have new task", {});
+            LOG_VERBOSE("new task may arrive", {});
            {
                while (true)
                {
@ -303,7 +328,7 @@ struct llama_server_queue {
                    task_server task = queue_tasks.front();
                    queue_tasks.erase(queue_tasks.begin());
                    lock.unlock();
-                    LOG_VERBOSE("callback_new_task", {});
+                    LOG_VERBOSE("callback_new_task", {{"task_id", task.id}});
                    callback_new_task(task);
                }
                LOG_VERBOSE("callback_all_task_finished", {});
@ -384,11 +409,13 @@ struct llama_server_response {
    std::condition_variable condition_results;

    void add_waiting_task_id(int task_id) {
+        LOG_VERBOSE("waiting for task id", {{"task_id", task_id}});
        std::unique_lock<std::mutex> lock(mutex_results);
        waiting_task_ids.insert(task_id);
    }

    void remove_waiting_task_id(int task_id) {
+        LOG_VERBOSE("remove waiting for task id", {{"task_id", task_id}});
        std::unique_lock<std::mutex> lock(mutex_results);
        waiting_task_ids.erase(task_id);
    }
@ -401,7 +428,6 @@ struct llama_server_response {
            condition_results.wait(lock, [&]{
                return !queue_results.empty();
            });
-            LOG_VERBOSE("condition_results unblock", {});

            for (int i = 0; i < (int) queue_results.size(); i++)
            {
@ -426,20 +452,20 @@ struct llama_server_response {
    // Send a new result to a waiting task_id
    void send(task_result result) {
        std::unique_lock<std::mutex> lock(mutex_results);
-        LOG_VERBOSE("send new result", {});
+        LOG_VERBOSE("send new result", {{"task_id", result.id}});
        for (auto& task_id : waiting_task_ids) {
            // LOG_TEE("waiting task id %i \n", task_id);
            // for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
            if (result.multitask_id == task_id)
            {
-                LOG_VERBOSE("callback_update_multitask", {});
+                LOG_VERBOSE("callback_update_multitask", {{"task_id", task_id}});
                callback_update_multitask(task_id, result.id, result);
                continue;
            }

            if (result.id == task_id)
            {
-                LOG_VERBOSE("queue_results.push_back", {});
+                LOG_VERBOSE("queue_results.push_back", {{"task_id", task_id}});
                queue_results.push_back(result);
                condition_results.notify_all();
                return;