server : code style

2024-03-05 15:36:14 +02:00 · 2024-03-05 15:36:14 +02:00 · fef64c587d
commit fef64c587d
parent ad1d746caa
3 changed files with 737 additions and 940 deletions
--- a/examples/server/oai.hpp
+++ b/examples/server/oai.hpp
@ -12,9 +12,8 @@ using json = nlohmann::json;

 inline static json oaicompat_completion_params_parse(
    const struct llama_model * model,
-    const json &body, /* openai api json semantics */
-    const std::string &chat_template)
-{
+    const json & body, /* openai api json semantics */
+    const std::string & chat_template) {
    json llama_params;

    llama_params["__oaicompat"] = true;
@ -34,7 +33,7 @@ inline static json oaicompat_completion_params_parse(
    llama_params["top_k"]             = json_value(body,   "top_k",             default_sparams.top_k);
    llama_params["top_p"]             = json_value(body,   "top_p",             1.0);
    llama_params["n_predict"]         = json_value(body,   "max_tokens",        -1);
-    llama_params["logit_bias"]        = json_value(body, "logit_bias",json::object());
+    llama_params["logit_bias"]        = json_value(body,   "logit_bias",        json::object());
    llama_params["frequency_penalty"] = json_value(body,   "frequency_penalty", 0.0);
    llama_params["presence_penalty"]  = json_value(body,   "presence_penalty",  0.0);
    llama_params["seed"]              = json_value(body,   "seed",              LLAMA_DEFAULT_SEED);
@ -65,8 +64,7 @@ inline static json oaicompat_completion_params_parse(
    return llama_params;
 }

-inline static json format_final_response_oaicompat(const json &request, const task_result &response, bool streaming = false)
-{
+inline static json format_final_response_oaicompat(const json & request, const task_result & response, bool streaming = false) {
    json result = response.result_json;

    bool stopped_word        = result.count("stopped_word") != 0;
@ -91,17 +89,19 @@ inline static json format_final_response_oaicompat(const json &request, const ta

    std::time_t t = std::time(0);

-    json res =
-        json{{"choices", choices},
+    json res = json {
+        {"choices", choices},
        {"created", t},
        {"model",
            json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
        {"object", streaming ? "chat.completion.chunk" : "chat.completion"},
-            {"usage",
-                json{{"completion_tokens", num_tokens_predicted},
+        {"usage", json {
+            {"completion_tokens", num_tokens_predicted},
            {"prompt_tokens",     num_prompt_tokens},
-                     {"total_tokens",      num_tokens_predicted + num_prompt_tokens}}},
-            {"id", gen_chatcmplid()}};
+            {"total_tokens",      num_tokens_predicted + num_prompt_tokens}
+        }},
+        {"id", gen_chatcmplid()}
+    };

    if (server_verbose) {
        res["__verbose"] = result;
@ -196,26 +196,28 @@ inline static std::vector<json> format_partial_response_oaicompat(const task_res
        }
    }

-    json ret = json{{"choices", choices},
+    json ret = json {
+        {"choices", choices},
        {"created", t},
        {"id",      gen_chatcmplid()},
        {"model",   modelname},
-                    {"object", "chat.completion.chunk"}};
+        {"object",  "chat.completion.chunk"}
+    };

    return std::vector<json>({ret});
 }

-inline static json format_embeddings_response_oaicompat(const json &request, const json &embeddings)
-{
-    json res =
-        json{
+inline static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
+    json res = json {
        {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
        {"object", "list"},
-            {"usage",
-                json{{"prompt_tokens", 0},
-                     {"total_tokens", 0}}},
+        {"usage", json {
+            {"prompt_tokens", 0},
+            {"total_tokens", 0}
+        }},
        {"data", embeddings}
    };
+
    return res;
 }

--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -58,8 +58,8 @@ struct task_server {
    task_type type;
    json data;

-    bool infill_mode    = false;
-    bool embedding_mode = false;
+    bool infill    = false;
+    bool embedding = false;
 };

 struct task_result {
@ -187,7 +187,8 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
        res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
    }

-    std::string formatted_chat(buf.data(), res);
+    const std::string formatted_chat(buf.data(), res);
+
    LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});

    return formatted_chat;
@ -201,16 +202,17 @@ struct llama_server_queue {
    int id = 0;
    bool running;

-    std::mutex mutex_tasks;
-
    // queues
    std::vector<task_server> queue_tasks;
    std::vector<task_server> queue_tasks_deferred;
    std::vector<task_multi>  queue_multitasks;
+
+    std::mutex mutex_tasks;
    std::condition_variable condition_tasks;
+
    // callback functions
-    std::function<void(task_server&)> callback_new_task;
-    std::function<void(task_multi&)> callback_finish_multitask;
+    std::function<void(task_server &)> callback_new_task;
+    std::function<void(task_multi &)>  callback_finish_multitask;
    std::function<void(void)>          callback_run_slots;

    // Add a new task to the end of the queue
@ -265,10 +267,9 @@ struct llama_server_queue {
    }

    // end the start_loop routine
-    void terminate() { {
+    void terminate() {
        std::unique_lock<std::mutex> lock(mutex_tasks);
        running = false;
-        }
        condition_tasks.notify_all();
    }

@ -350,14 +351,11 @@ struct llama_server_queue {
    }

    // updatethe remaining subtasks, while appending results to multitask
-    void update_multitask(int id_multi, int subtask_id, task_result& result)
-    {
+    void update_multitask(int id_multi, int id_sub, task_result& result) {
        std::lock_guard<std::mutex> lock(mutex_tasks);
-        for (auto& multitask : queue_multitasks)
-        {
-            if (multitask.id == id_multi)
-            {
-                multitask.subtasks_remaining.erase(subtask_id);
+        for (auto & multitask : queue_multitasks) {
+            if (multitask.id == id_multi) {
+                multitask.subtasks_remaining.erase(id_sub);
                multitask.results.push_back(result);
            }
        }
@ -468,13 +466,10 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str

    std::vector<uint8_t> ret;

-    while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
-    {
+    while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
        char_array_4[i++] = encoded_string[in_]; in_++;
-        if (i == 4)
-        {
-            for (i = 0; i <4; i++)
-            {
+        if (i == 4) {
+            for (i = 0; i < 4; i++) {
                char_array_4[i] = base64_chars.find(char_array_4[i]);
            }

@ -482,23 +477,20 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
            char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
            char_array_3[2] = ((char_array_4[2] & 0x3) << 6) +   char_array_4[3];

-            for (i = 0; (i < 3); i++)
-            {
+            for (i = 0; (i < 3); i++) {
                ret.push_back(char_array_3[i]);
            }
+
            i = 0;
        }
    }

-    if (i)
-    {
-        for (j = i; j <4; j++)
-        {
+    if (i) {
+        for (j = i; j < 4; j++) {
            char_array_4[j] = 0;
        }

-        for (j = 0; j <4; j++)
-        {
+        for (j = 0; j < 4; j++) {
            char_array_4[j] = base64_chars.find(char_array_4[j]);
        }

@ -506,8 +498,7 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
        char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
        char_array_3[2] = ((char_array_4[2] & 0x3) << 6) +   char_array_4[3];

-        for (j = 0; (j < i - 1); j++)
-        {
+        for (j = 0; j < i - 1; j++) {
            ret.push_back(char_array_3[j]);
        }
    }
@ -586,6 +577,7 @@ static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
 // format incomplete utf-8 multibyte character for output
 static std::string tokens_to_output_formatted_string(const llama_context * ctx, const llama_token token) {
    std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
+
    // if the size is 1 and first bit is 1, meaning it's a partial character
    //   (size > 1 meaning it's already a known token)
    if (out.size() == 1 && (out[0] & 0x80) == 0x80) {
@ -601,6 +593,7 @@ static std::string tokens_to_output_formatted_string(const llama_context * ctx,
 // convert a vector of completion_token_output to json
 static json probs_vector_to_json(const llama_context * ctx, const std::vector<completion_token_output> & probs) {
    json out = json::array();
+
    for (const auto & prob : probs) {
        json probs_for_token = json::array();