utils.hpp refactored

2024-03-08 12:53:28 +00:00 · 2024-03-08 12:53:28 +00:00 · a6d2611624
commit a6d2611624
parent f44aa1f23a
1 changed files with 11 additions and 454 deletions
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -17,148 +17,27 @@ using json = nlohmann::json;
 extern bool server_verbose;
 extern bool server_log_json;
-#ifndef SERVER_VERBOSE          // if not defined externally above, make it true and enable verbose logging
+#ifndef SERVER_VERBOSE
 #define SERVER_VERBOSE 1
 #endif
 /*
 // Example usage (WIP):
 LogRedirection default_settings;  // Use defaults but not necessary to say so
 LogRedirection custom_settings;
 // Modify values for custom settings if needed
 log_settings.stdout_target = "/tmp/custom.log";
 LOG_ERROR("Default error", "Details", default_settings);    // can omit default_settings because it will default to log_settings unmodified
 LOG_ERROR("Custom error", "More info", custom_settings);
 Yes, using the LogRedirection struct approach eliminates the need to explicitly declare the extra variables for redirection targets and reset strings each time you call LOG_ERROR. Here's how it works:
 1. Redirection Settings Encapsulated: The LogRedirection struct holds these settings, making them reusable and adaptable.
 2. Default Values: The struct's members have default values defined, serving as fallbacks.
 3. Macro Handles Settings: The LOG_ERROR (etc.) macros take a LogRedirection object and passes its members to the server_log function.
 Example:
 LOG_ERROR("Default error", {});  // Uses defaults from an empty LogRedirection object
 This compact usage is possible because:
 {} creates a temporary LogRedirection object with its members implicitly initialized to the default values.
 The macro passes those defaults to server_log, achieving the desired behaviour without requiring explicit variable declarations at every call.
 Customization:
 When needed, you can create a LogRedirection object with specific values and pass it to LOG_ERROR (etc.) for tailored logging behaviour:
 LogRedirection custom_settings = {.stdout_target = "/tmp/my_log.out"};
 LOG_ERROR("Custom error", "Details", custom_settings);
 */
 // ATTEMPT TO REFACTOR THE LOGGING BEHAVIOUR AND ALLOW REDIRECTION OF STDOUT, STDERR
 struct LogRedirection {
  // Set default values for redirection targets and reset strings
  std::string stdout_target = "stdout.log"; // if a log it will be in ./build and eventually overwritten
  std::string stdout_reset = "/dev/stdout";
  std::string stderr_target = "stderr.log"; // will be in ./build and eventually overwritten
  std::string stderr_reset = "/dev/stderr";
 };
 LogRedirection log_settings;    // TODO: avoid global declaration
 #if SERVER_VERBOSE != 1
-#define LOG_VERBOSE(MSG, ...)   // if not verbose logging just return empty
+#define LOG_VERBOSE(MSG, ...)
 #else
 #define LOG_VERBOSE(MSG, ...)                                            \
    do                                                                   \
    {                                                                    \
        if (server_verbose)                                              \
        {                                                                \
-            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__,     \
+            server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__); \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset);   \
        }                                                                \
-    } while (0)     // this is always false so the loop only compiles once but is treated as a single statement
+    } while (0)
 #endif
-#define LOG_ERROR(MSG, ...) \
+#define LOG_ERROR(  MSG, ...) server_log("ERR",  __func__, __LINE__, MSG, __VA_ARGS__)
-    server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__, \
+#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
-               log_settings.stdout_target, log_settings.stderr_target, \
+#define LOG_INFO(   MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
               log_settings.stdout_reset, log_settings.stderr_reset)
 #define LOG_WARNING(MSG, ...) \
    server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 #define LOG_INFO(MSG, ...) \
    server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__, \
               log_settings.stdout_target, log_settings.stderr_target, \
               log_settings.stdout_reset, log_settings.stderr_reset)
 //
 // parallel
 //
 <<<<<<< HEAD
 enum server_state {
    SERVER_STATE_LOADING_MODEL,  // Server is starting up, model not fully loaded yet
    SERVER_STATE_READY,          // Server is ready and model is loaded
    SERVER_STATE_ERROR           // An error occurred, load_model failed
 };
 enum task_type {
    TASK_TYPE_COMPLETION,
    TASK_TYPE_CANCEL,
    TASK_TYPE_NEXT_RESPONSE,
    TASK_TYPE_METRICS
 };
 struct task_server {
    int id = -1; // for any instance, task id is not assigned yet; to be filled by llama_server_queue
    int target_id;
    task_type type;
    json data;
    bool infill_mode = false;
    bool embedding_mode = false;
    int multitask_id = -1;
 };
 struct task_result {
    int id;
    int multitask_id = -1;
    bool stop;
    bool error;
    json result_json;
 };
 struct task_multi {
    int id;
    std::set<int> subtasks_remaining{};
    std::vector<task_result> results{};
 };
 // completion token output with probabilities
 struct completion_token_output {
    struct token_prob
    {
        llama_token tok;
        float prob;
    };
    std::vector<token_prob> probs;
    llama_token tok;
    std::string text_to_send;
 };
 struct token_translator {
    llama_context * ctx;
    std::string operator()(llama_token tok)                    const { return llama_token_to_piece(ctx, tok); }
    std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
 };
 =======
 template <typename T>
 static T json_value(const json &body, const std::string &key, const T &default_value) {
    // Fallback null to default value
@ -166,22 +45,8 @@ static T json_value(const json &body, const std::string &key, const T &default_v
        ? body.value(key, default_value)
        : default_value;
 }
 >>>>>>> origin/master
-static inline void server_log(
+static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
    const char *level,
    const char *function,
    int line,
    const char *message,
    const nlohmann::ordered_json &extra,
    // targets and resets for stdout and stderr are specified in LogRedirection above;
    // this allows different behaviour for LOG_ERROR, LOG_WARNING and LOG_INFO
    std::string stdout_target,
    std::string stderr_target,
    std::string stdout_reset,
    std::string stderr_reset
    )
    {
    std::stringstream ss_tid;
    ss_tid << std::this_thread::get_id();
    json log = nlohmann::ordered_json{
@ -189,28 +54,6 @@ static inline void server_log(
        {"timestamp", time(nullptr)},
    };
    // to allow the graphics to print to stdout we redirect non-graphical stdout to stderr
    // to silence the graphics we direct stdout to dev/null or don't initialise them
    /*
    FILE* new_stdout = freopen(stdout_target.c_str(), "a", stdout);
    if (new_stdout == nullptr) {
        std::cerr << "Error on redirecting stdout to " << stdout_target.c_str() << std::endl;
    } else {
        std::cerr << "Redirected stdout successfully to " << stdout_target.c_str() << std::endl;
    }
    */
    stdout_target = "";     // to silence the declaration of unused variable warnings
    stdout_reset = "";
    FILE* new_stderr = freopen(stderr_target.c_str(), "a", stderr);
    if (new_stderr == nullptr) {
        std::cerr << "Error on redirecting stderr to " << stderr_target.c_str() << std::endl;
    } else {
        std::cerr << "Redirected stderr successfully to " << stderr_target.c_str() << std::endl;
    }
    //freopen(stderr_target.c_str(), "a", stderr);      // we assign stderr to dev/null effectively 'blackholing' the output because log.dump below is redirected too
    if (server_log_json) {
        log.merge_patch( {
            {"level",    level},
@ -222,16 +65,11 @@ static inline void server_log(
        if (!extra.empty()) {
            log.merge_patch(extra);
        }
        std::cerr << log.dump(-1, ' ', false, json::error_handler_t::replace) << "\n" << std::flush;    // was originally std:cout
 <<<<<<< HEAD
    } else {                // store the logs in (because not json) text format
 =======
        printf("%s\n", log.dump(-1, ' ', false, json::error_handler_t::replace).c_str());
    } else {
 >>>>>>> origin/master
        char buf[1024];
-        snprintf(buf, 1024, "\033[85;0H%4s [%24s] %s", level, function, message);
+        snprintf(buf, 1024, "%4s [%24s] %s", level, function, message);
        if (!extra.empty()) {
            log.merge_patch(extra);
@ -245,25 +83,9 @@ static inline void server_log(
        }
        const std::string str = ss.str();
-        printf("\033[85;0H%.*s\n", (int)str.size(), str.data());
+        printf("%.*s\n", (int)str.size(), str.data());
-        fflush(stderr);                                                // was originally fflush(stdout)
+        fflush(stdout);
    }
    new_stderr = freopen(stderr_reset.c_str(), "a", stderr);
    if (new_stderr == nullptr) {
        std::cerr << "Error on resetting stderr to " << stderr_reset.c_str() << std::endl;
    }  else {
        std::cerr << "Reset stderr successfully to " << stderr_reset.c_str() << std::endl;
    }
    /*
    new_stdout = freopen(stdout_reset.c_str(), "a", stdout);
    if (new_stdout == nullptr) {
        std::cerr << "Error on resetting stdout to " << stdout_reset.c_str() << std::endl;
    } else {
        std::cerr << "Reset stdout successfully to " << stdout_reset.c_str() << std::endl;
    }
    */
 }
 //
@ -305,279 +127,14 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
        res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
    }
 <<<<<<< HEAD
    std::string formatted_chat(buf.data(), res);
    LOG_VERBOSE("formatted_chat", {"text", formatted_chat.c_str()});
 =======
    const std::string formatted_chat(buf.data(), res);
    LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
 >>>>>>> origin/master
    return formatted_chat;
 }
 //
 <<<<<<< HEAD
 // work queue utils
 //
 struct llama_server_queue {
    int id = 0;
    std::mutex mutex_tasks;
    bool running;
    // queues
    std::vector<task_server> queue_tasks;
    std::vector<task_server> queue_tasks_deferred;
    std::vector<task_multi> queue_multitasks;
    std::condition_variable condition_tasks;
    // callback functions
    std::function<void(task_server&)> callback_new_task;
    std::function<void(task_multi&)> callback_finish_multitask;
    std::function<void(void)> callback_run_slots;
    // Add a new task to the end of the queue
    int post(task_server task) {
        std::unique_lock<std::mutex> lock(mutex_tasks);
        if (task.id == -1) {
            task.id = id++;
            LOG_VERBOSE("new task id", {"new_id", task.id});
        }
        queue_tasks.push_back(std::move(task));
        //LOG("Queue now has %2zu members.\n", queue_tasks.size());
        condition_tasks.notify_one();
        return task.id;
    }
    // Add a new task, but defer until one slot is available
    void defer(task_server task) {
        std::unique_lock<std::mutex> lock(mutex_tasks);
        queue_tasks_deferred.push_back(std::move(task));
        LOG("Deferred queue now has %3zu members.\n", queue_tasks_deferred.size());
    }
    // Get the next id for creating a new task
    int get_new_id() {
        std::unique_lock<std::mutex> lock(mutex_tasks);
        int new_id = id++;
        LOG_VERBOSE("new task id", {"new_id", new_id});
        return new_id;
    }
    // Register function to process a new task
    void on_new_task(std::function<void(task_server&)> callback) {
        callback_new_task = callback;
    }
    // Register function to process a multitask when it is finished
    void on_finish_multitask(std::function<void(task_multi&)> callback) {
        callback_finish_multitask = callback;
    }
    // Register the function to be called when all slots data is ready to be processed
    void on_run_slots(std::function<void(void)> callback) {
        callback_run_slots = callback;
    }
    // Call when the state of one slot is changed
    void notify_slot_changed() {
        // move deferred tasks back to main loop
        // does this mean when ONE slot finished we move ALL deferred tasks back to the main queue? Why?
        // it seems that we move everything back to the main queue but we don't allocate a task to the slot just released
        // lock so nothing gets added while we are clearing the deferred queue
        std::unique_lock<std::mutex> lock(mutex_tasks);
        for (auto & task : queue_tasks_deferred) {
            queue_tasks.push_back(std::move(task));
        }
        queue_tasks_deferred.clear();   // and clear the deferred tasks completely?
    }
    // end the start_loop routine
    void terminate() {
        {
            std::unique_lock<std::mutex> lock(mutex_tasks);
            running = false;
        }
        condition_tasks.notify_all();
    }
    /**
     * Main loop consists of these steps:
     * - Wait until a new task arrives
     * - Process the task (i.e. maybe copy data into slot)
     * - Check if multitask is finished
     * - Run all slots
     */
    void start_loop() {
        running = true;
        //LOG("In start_loop have new task number %d.\n", id);
        while (true) {
            LOG_VERBOSE("new task may arrive", {});
            {
                while (true)
                {
                    std::unique_lock<std::mutex> lock(mutex_tasks);
                    if (queue_tasks.empty()) {
                        lock.unlock();
                        break;
                    }
                    task_server task = queue_tasks.front();
                    queue_tasks.erase(queue_tasks.begin());
                    lock.unlock();
                    LOG_VERBOSE("callback_new_task", {"task_id", task.id});
                    callback_new_task(task);
                }
                LOG_VERBOSE("update_multitasks", {});
                // check if we have any finished multitasks
                auto queue_iterator = queue_multitasks.begin();
                while (queue_iterator != queue_multitasks.end())
                {
                    if (queue_iterator->subtasks_remaining.empty())
                    {
                        // all subtasks done == multitask is done
                        task_multi current_multitask = *queue_iterator;
                        callback_finish_multitask(current_multitask);
                        // remove this multitask
                        queue_iterator = queue_multitasks.erase(queue_iterator);
                    }
                    else
                    {
                        ++queue_iterator;
                    }
                }
                // all tasks in the current loop is processed, slots data is now ready
                LOG_VERBOSE("callback_run_slots", {});
                callback_run_slots();
            }
            LOG_VERBOSE("wait for new task", {});
            // wait for new task
            {
                std::unique_lock<std::mutex> lock(mutex_tasks);
                if (queue_tasks.empty()) {
                    if (!running) {
                        LOG_VERBOSE("ending start_loop", {});
                        return;
                    }
                    condition_tasks.wait(lock, [&]{
                        return (!queue_tasks.empty() || !running);
                    });
                }
            }
        }
    }
    //
    // functions to manage multitasks
    //
    // add a multitask by specifying the id of all subtask (subtask is a task_server)
    void add_multitask(int multitask_id, std::vector<int>& sub_ids)
    {
        std::lock_guard<std::mutex> lock(mutex_tasks);
        task_multi multi;
        multi.id = multitask_id;
        std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
        queue_multitasks.push_back(multi);
    }
    // updatethe remaining subtasks, while appending results to multitask
    void update_multitask(int multitask_id, int subtask_id, task_result& result)
    {
        std::lock_guard<std::mutex> lock(mutex_tasks);
        for (auto& multitask : queue_multitasks)
        {
            if (multitask.id == multitask_id)
            {
                multitask.subtasks_remaining.erase(subtask_id);
                multitask.results.push_back(result);
            }
        }
    }
 };
 struct llama_server_response {
    typedef std::function<void(int, int, task_result&)> callback_multitask_t;
    callback_multitask_t callback_update_multitask;
    // for keeping track of all tasks waiting for the result
    std::set<int> waiting_task_ids;     // this stores waiting tasks with no obvious limit
    // the main result queue
    std::vector<task_result> queue_results;
    std::mutex mutex_results;
    std::condition_variable condition_results;
    // add the task_id to the list of tasks waiting for response
    void add_waiting_task_id(int task_id) {
        LOG_VERBOSE("waiting for task id", {"task_id", task_id});
        std::unique_lock<std::mutex> lock(mutex_results);
        waiting_task_ids.insert(task_id);
        LOG("Waiting task list size after addition: %2zu.\n", waiting_task_ids.size());
    }
    // when the request is finished, we can remove task associated with it
    void remove_waiting_task_id(int task_id) {
        LOG_VERBOSE("remove waiting for task id", {"task_id", task_id});
        std::unique_lock<std::mutex> lock(mutex_results);
        waiting_task_ids.erase(task_id);
        LOG("Waiting task list size after removal: %zu.\n", waiting_task_ids.size());
    }
    // This function blocks the thread until there is a response for this task_id
    task_result recv(int task_id) {
        while (true)
        {
            std::unique_lock<std::mutex> lock(mutex_results);
            condition_results.wait(lock, [&]{
                return !queue_results.empty();
            });
            for (int i = 0; i < (int) queue_results.size(); i++)
            {
                if (queue_results[i].id == task_id)
                {
                    assert(queue_results[i].multitask_id == -1);
                    task_result res = queue_results[i];
                    queue_results.erase(queue_results.begin() + i);
                    return res;
                }
            }
        }
        // should never reach here
    }
    // Register the function to update multitask
    void on_multitask_update(callback_multitask_t callback) {
        callback_update_multitask = callback;
    }
    // Send a new result to a waiting task_id
    void send(task_result result) {
        std::unique_lock<std::mutex> lock(mutex_results);
        LOG_VERBOSE("send new result", {"task_id", result.id});
        for (auto& task_id : waiting_task_ids) {
            // LOG("waiting task id %i \n", task_id);
            // for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
            if (result.multitask_id == task_id)
            {
                LOG_VERBOSE("callback_update_multitask", {"task_id", task_id});
                callback_update_multitask(task_id, result.id, result);
                continue;
            }
            if (result.id == task_id)
            {
                LOG_VERBOSE("queue_results.push_back", {"task_id", task_id});
                queue_results.push_back(result);
                condition_results.notify_all();
                return;
            }
        }
    }
 };
 //
 =======
 >>>>>>> origin/master
 // base64 utils (TODO: move to common in the future)
 //