* fix deadlock

This commit is contained in:
ziadb 2024-01-12 20:31:48 -05:00
parent de473f5f8e
commit 5805fdaae2

View file

@ -724,7 +724,8 @@ struct llama_server_context
if (data.count("__oaicompat") != 0) { if (data.count("__oaicompat") != 0) {
slot->oaicompat = true; slot->oaicompat = true;
slot->oaicompat_model = json_value(data, "model", std::string(DEFAULT_OAICOMPAT_MODEL)); slot->oaicompat_model = json_value(data, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
} else { }
else {
slot->oaicompat = false; slot->oaicompat = false;
slot->oaicompat_model = ""; slot->oaicompat_model = "";
} }
@ -913,7 +914,8 @@ struct llama_server_context
slot->images.clear(); slot->images.clear();
return false; return false;
} }
} catch (const std::invalid_argument& e) { }
catch (const std::invalid_argument& e) {
LOG_TEE("Invalid image number id in prompt\n"); LOG_TEE("Invalid image number id in prompt\n");
slot->images.clear(); slot->images.clear();
return false; return false;
@ -1350,14 +1352,17 @@ struct llama_server_context
res.result_json["model"] = slot.oaicompat_model; res.result_json["model"] = slot.oaicompat_model;
} }
queue_results.push_back(res);
condition_results.notify_all();
// done with results, unlock
lock.unlock();
// parent multitask, if any, needs to be updated // parent multitask, if any, needs to be updated
if (slot.multitask_id != -1) if (slot.multitask_id != -1)
{ {
update_multi_task(slot.multitask_id, slot.task_id, res); update_multi_task(slot.multitask_id, slot.task_id, res);
} }
queue_results.push_back(res);
condition_results.notify_all();
} }
void send_embedding(llama_client_slot& slot) void send_embedding(llama_client_slot& slot)
@ -1603,6 +1608,7 @@ struct llama_server_context
} }
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
std::vector<task_result> agg_results;
auto queue_iterator = queue_multitasks.begin(); auto queue_iterator = queue_multitasks.begin();
while (queue_iterator != queue_multitasks.end()) while (queue_iterator != queue_multitasks.end())
{ {
@ -1623,8 +1629,9 @@ struct llama_server_context
} }
aggregate_result.result_json = json{ "results", result_jsons }; aggregate_result.result_json = json{ "results", result_jsons };
std::lock_guard<std::mutex> lock(mutex_results);
queue_results.push_back(aggregate_result); agg_results.push_back(aggregate_result);
condition_results.notify_all(); condition_results.notify_all();
queue_iterator = queue_multitasks.erase(queue_iterator); queue_iterator = queue_multitasks.erase(queue_iterator);
@ -1634,6 +1641,13 @@ struct llama_server_context
++queue_iterator; ++queue_iterator;
} }
} }
// done with tasks, unlock
lock.unlock();
// copy aggregate results of complete multi-tasks to the results queue
std::lock_guard<std::mutex> lock_results(mutex_results);
queue_results.insert(queue_results.end(), agg_results.begin(), agg_results.end());
} }
bool update_slots() { bool update_slots() {
@ -2407,7 +2421,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
break; break;
} }
params.n_parallel = std::stoi(argv[i]); params.n_parallel = std::stoi(argv[i]);
} else if (arg == "-n" || arg == "--n-predict") }
else if (arg == "-n" || arg == "--n-predict")
{ {
if (++i >= argc) if (++i >= argc)
{ {
@ -2415,7 +2430,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
break; break;
} }
params.n_predict = std::stoi(argv[i]); params.n_predict = std::stoi(argv[i]);
} else if (arg == "-spf" || arg == "--system-prompt-file") }
else if (arg == "-spf" || arg == "--system-prompt-file")
{ {
if (++i >= argc) if (++i >= argc)
{ {
@ -2470,23 +2486,28 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
sep += 4; sep += 4;
kvo.tag = LLAMA_KV_OVERRIDE_INT; kvo.tag = LLAMA_KV_OVERRIDE_INT;
kvo.int_value = std::atol(sep); kvo.int_value = std::atol(sep);
} else if (strncmp(sep, "float:", 6) == 0) { }
else if (strncmp(sep, "float:", 6) == 0) {
sep += 6; sep += 6;
kvo.tag = LLAMA_KV_OVERRIDE_FLOAT; kvo.tag = LLAMA_KV_OVERRIDE_FLOAT;
kvo.float_value = std::atof(sep); kvo.float_value = std::atof(sep);
} else if (strncmp(sep, "bool:", 5) == 0) { }
else if (strncmp(sep, "bool:", 5) == 0) {
sep += 5; sep += 5;
kvo.tag = LLAMA_KV_OVERRIDE_BOOL; kvo.tag = LLAMA_KV_OVERRIDE_BOOL;
if (std::strcmp(sep, "true") == 0) { if (std::strcmp(sep, "true") == 0) {
kvo.bool_value = true; kvo.bool_value = true;
} else if (std::strcmp(sep, "false") == 0) { }
else if (std::strcmp(sep, "false") == 0) {
kvo.bool_value = false; kvo.bool_value = false;
} else { }
else {
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]); fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
invalid_param = true; invalid_param = true;
break; break;
} }
} else { }
else {
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]); fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
invalid_param = true; invalid_param = true;
break; break;
@ -2596,7 +2617,8 @@ json oaicompat_completion_params_parse(
// Handle 'stop' field // Handle 'stop' field
if (body.contains("stop") && body["stop"].is_string()) { if (body.contains("stop") && body["stop"].is_string()) {
llama_params["stop"] = json::array({ body["stop"].get<std::string>() }); llama_params["stop"] = json::array({ body["stop"].get<std::string>() });
} else { }
else {
llama_params["stop"] = json_value(body, "stop", json::array()); llama_params["stop"] = json_value(body, "stop", json::array());
} }
@ -2687,13 +2709,15 @@ static std::vector<json> format_partial_response_oaicompat(const task_result &re
choices = json::array({ json{{"finish_reason", finish_reason}, choices = json::array({ json{{"finish_reason", finish_reason},
{"index", 0}, {"index", 0},
{"delta", json::object()}} }); {"delta", json::object()}} });
} else { }
else {
if (first) { if (first) {
if (content.empty()) { if (content.empty()) {
choices = json::array({ json{{"finish_reason", nullptr}, choices = json::array({ json{{"finish_reason", nullptr},
{"index", 0}, {"index", 0},
{"delta", json{{"role", "assistant"}}}} }); {"delta", json{{"role", "assistant"}}}} });
} else { }
else {
// We have to send this as two updates to conform to openai behavior // We have to send this as two updates to conform to openai behavior
json initial_ret = json{ {"choices", json::array({json{ json initial_ret = json{ {"choices", json::array({json{
{"finish_reason", nullptr}, {"finish_reason", nullptr},
@ -2719,7 +2743,8 @@ static std::vector<json> format_partial_response_oaicompat(const task_result &re
return std::vector<json>({ initial_ret, second_ret }); return std::vector<json>({ initial_ret, second_ret });
} }
} else { }
else {
// Some idiosyncrasy in task processing logic makes several trailing calls // Some idiosyncrasy in task processing logic makes several trailing calls
// with empty content, we ignore these at the calee site. // with empty content, we ignore these at the calee site.
if (content.empty()) { if (content.empty()) {
@ -2942,7 +2967,8 @@ int main(int argc, char **argv)
if (sparams.api_keys.size() == 1) { if (sparams.api_keys.size() == 1) {
log_data["api_key"] = "api_key: ****" + sparams.api_keys[0].substr(sparams.api_keys[0].length() - 4); log_data["api_key"] = "api_key: ****" + sparams.api_keys[0].substr(sparams.api_keys[0].length() - 4);
} else if (sparams.api_keys.size() > 1) { }
else if (sparams.api_keys.size() > 1) {
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
} }
@ -2964,7 +2990,8 @@ int main(int argc, char **argv)
{ {
state.store(SERVER_STATE_ERROR); state.store(SERVER_STATE_ERROR);
return 1; return 1;
} else { }
else {
llama.initialize(); llama.initialize();
state.store(SERVER_STATE_READY); state.store(SERVER_STATE_READY);
LOG_INFO("model loaded", {}); LOG_INFO("model loaded", {});
@ -3054,7 +3081,8 @@ int main(int argc, char **argv)
res.set_content(result.result_json["content"], "text/plain; charset=utf-8"); res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
return; return;
} }
} else { }
else {
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink) const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink)
{ {
while (true) while (true)
@ -3075,7 +3103,8 @@ int main(int argc, char **argv)
if (result.stop) { if (result.stop) {
break; break;
} }
} else { }
else {
const std::string str = const std::string str =
"error: " + "error: " +
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) + result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
@ -3146,12 +3175,14 @@ int main(int argc, char **argv)
res.set_content(oaicompat_result.dump(-1, ' ', false, res.set_content(oaicompat_result.dump(-1, ' ', false,
json::error_handler_t::replace), json::error_handler_t::replace),
"application/json; charset=utf-8"); "application/json; charset=utf-8");
} else { }
else {
res.status = 500; res.status = 500;
res.set_content(result.result_json["content"], "text/plain; charset=utf-8"); res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
return; return;
} }
} else { }
else {
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink) { const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink) {
while (true) { while (true) {
task_result llama_result = llama.next_result(task_id); task_result llama_result = llama.next_result(task_id);
@ -3174,7 +3205,8 @@ int main(int argc, char **argv)
if (llama_result.stop) { if (llama_result.stop) {
break; break;
} }
} else { }
else {
const std::string str = const std::string str =
"error: " + "error: " +
llama_result.result_json.dump(-1, ' ', false, llama_result.result_json.dump(-1, ' ', false,
@ -3221,7 +3253,8 @@ int main(int argc, char **argv)
res.set_content(result.result_json["content"], "text/plain; charset=utf-8"); res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
return; return;
} }
} else { }
else {
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink) { const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink& sink) {
while (true) while (true)
{ {