* fix deadlock

2024-01-12 20:31:48 -05:00 · 2024-01-12 20:31:48 -05:00 · 5805fdaae2
commit 5805fdaae2
parent de473f5f8e
1 changed files with 701 additions and 668 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -724,7 +724,8 @@ struct llama_server_context
        if (data.count("__oaicompat") != 0) {
            slot->oaicompat = true;
            slot->oaicompat_model = json_value(data, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
-        } else {
+        }
+        else {
            slot->oaicompat = false;
            slot->oaicompat_model = "";
        }
@ -913,7 +914,8 @@ struct llama_server_context
                                    slot->images.clear();
                                    return false;
                                }
-                            } catch (const std::invalid_argument& e) {
+                            }
+                            catch (const std::invalid_argument& e) {
                                LOG_TEE("Invalid image number id in prompt\n");
                                slot->images.clear();
                                return false;
@ -1350,14 +1352,17 @@ struct llama_server_context
            res.result_json["model"] = slot.oaicompat_model;
        }

+        queue_results.push_back(res);
+        condition_results.notify_all();
+
+        // done with results, unlock
+        lock.unlock();
+
        // parent multitask, if any, needs to be updated
        if (slot.multitask_id != -1)
        {
            update_multi_task(slot.multitask_id, slot.task_id, res);
        }
-
-        queue_results.push_back(res);
-        condition_results.notify_all();
    }

    void send_embedding(llama_client_slot& slot)
@ -1603,6 +1608,7 @@ struct llama_server_context
        }

        // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
+        std::vector<task_result> agg_results;
        auto queue_iterator = queue_multitasks.begin();
        while (queue_iterator != queue_multitasks.end())
        {
@ -1623,8 +1629,9 @@ struct llama_server_context
                }
                aggregate_result.result_json = json{ "results", result_jsons };

-                std::lock_guard<std::mutex> lock(mutex_results);
-                queue_results.push_back(aggregate_result);
+
+                agg_results.push_back(aggregate_result);
+
                condition_results.notify_all();

                queue_iterator = queue_multitasks.erase(queue_iterator);
@ -1634,6 +1641,13 @@ struct llama_server_context
                ++queue_iterator;
            }
        }
+
+        // done with tasks, unlock
+        lock.unlock();
+
+        // copy aggregate results of complete multi-tasks to the results queue
+        std::lock_guard<std::mutex> lock_results(mutex_results);
+        queue_results.insert(queue_results.end(), agg_results.begin(), agg_results.end());
    }

    bool update_slots() {
@ -2407,7 +2421,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                break;
            }
            params.n_parallel = std::stoi(argv[i]);
-        } else if (arg == "-n" || arg == "--n-predict")
+        }
+        else if (arg == "-n" || arg == "--n-predict")
        {
            if (++i >= argc)
            {
@ -2415,7 +2430,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                break;
            }
            params.n_predict = std::stoi(argv[i]);
-        } else if (arg == "-spf" || arg == "--system-prompt-file")
+        }
+        else if (arg == "-spf" || arg == "--system-prompt-file")
        {
            if (++i >= argc)
            {
@ -2470,23 +2486,28 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                sep += 4;
                kvo.tag = LLAMA_KV_OVERRIDE_INT;
                kvo.int_value = std::atol(sep);
-            } else if (strncmp(sep, "float:", 6) == 0) {
+            }
+            else if (strncmp(sep, "float:", 6) == 0) {
                sep += 6;
                kvo.tag = LLAMA_KV_OVERRIDE_FLOAT;
                kvo.float_value = std::atof(sep);
-            } else if (strncmp(sep, "bool:", 5) == 0) {
+            }
+            else if (strncmp(sep, "bool:", 5) == 0) {
                sep += 5;
                kvo.tag = LLAMA_KV_OVERRIDE_BOOL;
                if (std::strcmp(sep, "true") == 0) {
                    kvo.bool_value = true;
-                } else if (std::strcmp(sep, "false") == 0) {
+                }
+                else if (std::strcmp(sep, "false") == 0) {
                    kvo.bool_value = false;
-                } else {
+                }
+                else {
                    fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
                    invalid_param = true;
                    break;
                }
-            } else {
+            }
+            else {
                fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
                invalid_param = true;
                break;
@ -2596,7 +2617,8 @@ json oaicompat_completion_params_parse(
    // Handle 'stop' field
    if (body.contains("stop") && body["stop"].is_string()) {
        llama_params["stop"] = json::array({ body["stop"].get<std::string>() });
-    } else {
+    }
+    else {
        llama_params["stop"] = json_value(body, "stop", json::array());
    }

@ -2687,13 +2709,15 @@ static std::vector<json> format_partial_response_oaicompat(const task_result &re
        choices = json::array({ json{{"finish_reason", finish_reason},
                                    {"index", 0},
                                    {"delta", json::object()}} });
-    } else {
+    }
+    else {
        if (first) {
            if (content.empty()) {
                choices = json::array({ json{{"finish_reason", nullptr},
                                            {"index", 0},
                                            {"delta", json{{"role", "assistant"}}}} });
-            } else {
+            }
+            else {
                // We have to send this as two updates to conform to openai behavior
                json initial_ret = json{ {"choices", json::array({json{
                                        {"finish_reason", nullptr},
@ -2719,7 +2743,8 @@ static std::vector<json> format_partial_response_oaicompat(const task_result &re

                return std::vector<json>({ initial_ret, second_ret });
            }
-        } else {
+        }
+        else {
            // Some idiosyncrasy in task processing logic makes several trailing calls
            // with empty content, we ignore these at the calee site.
            if (content.empty()) {
@ -2942,7 +2967,8 @@ int main(int argc, char **argv)

    if (sparams.api_keys.size() == 1) {
        log_data["api_key"] = "api_key: ****" + sparams.api_keys[0].substr(sparams.api_keys[0].length() - 4);
-    } else if (sparams.api_keys.size() > 1) {
+    }
+    else if (sparams.api_keys.size() > 1) {
        log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
    }

@ -2964,7 +2990,8 @@ int main(int argc, char **argv)
    {
        state.store(SERVER_STATE_ERROR);
        return 1;
-    } else {
+    }
+    else {
        llama.initialize();
        state.store(SERVER_STATE_READY);
        LOG_INFO("model loaded", {});
@ -3054,7 +3081,8 @@ int main(int argc, char **argv)
                    res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
                    return;
                }
-                } else {
+            }
+            else {
                const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink)
                {
                    while (true)
@ -3075,7 +3103,8 @@ int main(int argc, char **argv)
                            if (result.stop) {
                                break;
                            }
-                            } else {
+                        }
+                        else {
                            const std::string str =
                                "error: " +
                                result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
@ -3146,12 +3175,14 @@ int main(int argc, char **argv)
                    res.set_content(oaicompat_result.dump(-1, ' ', false,
                        json::error_handler_t::replace),
                        "application/json; charset=utf-8");
-                    } else {
+                }
+                else {
                    res.status = 500;
                    res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
                    return;
                }
-                } else {
+            }
+            else {
                const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink) {
                    while (true) {
                        task_result llama_result = llama.next_result(task_id);
@ -3174,7 +3205,8 @@ int main(int argc, char **argv)
                            if (llama_result.stop) {
                                break;
                            }
-                            } else {
+                        }
+                        else {
                            const std::string str =
                                "error: " +
                                llama_result.result_json.dump(-1, ' ', false,
@ -3221,7 +3253,8 @@ int main(int argc, char **argv)
                    res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
                    return;
                }
-                } else {
+            }
+            else {
                const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink) {
                    while (true)
                    {