persist completion_id only for same stream
This commit is contained in:
parent
c5efd837b6
commit
aa95dc5568
2 changed files with 6 additions and 6 deletions
|
@ -69,7 +69,7 @@ inline static json oaicompat_completion_params_parse(
|
||||||
return llama_params;
|
return llama_params;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static json format_final_response_oaicompat(const json &request, const task_result &response, std::string id, bool streaming = false)
|
inline static json format_final_response_oaicompat(const json &request, const task_result &response, bool streaming = false)
|
||||||
{
|
{
|
||||||
json result = response.result_json;
|
json result = response.result_json;
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ inline static json format_final_response_oaicompat(const json &request, const ta
|
||||||
json{{"completion_tokens", num_tokens_predicted},
|
json{{"completion_tokens", num_tokens_predicted},
|
||||||
{"prompt_tokens", num_prompt_tokens},
|
{"prompt_tokens", num_prompt_tokens},
|
||||||
{"total_tokens", num_tokens_predicted + num_prompt_tokens}}},
|
{"total_tokens", num_tokens_predicted + num_prompt_tokens}}},
|
||||||
{"id", id}};
|
{"id", gen_chatcmplid()}};
|
||||||
|
|
||||||
if (server_verbose) {
|
if (server_verbose) {
|
||||||
res["__verbose"] = result;
|
res["__verbose"] = result;
|
||||||
|
|
|
@ -3210,8 +3210,7 @@ int main(int argc, char **argv)
|
||||||
res.set_content(models.dump(), "application/json; charset=utf-8");
|
res.set_content(models.dump(), "application/json; charset=utf-8");
|
||||||
});
|
});
|
||||||
|
|
||||||
const std::string completion_id = gen_chatcmplid();
|
const auto chat_completions = [&llama, &validate_api_key, &sparams](const httplib::Request &req, httplib::Response &res)
|
||||||
const auto chat_completions = [&llama, &validate_api_key, &sparams, &completion_id](const httplib::Request &req, httplib::Response &res)
|
|
||||||
{
|
{
|
||||||
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
||||||
if (!validate_api_key(req, res)) {
|
if (!validate_api_key(req, res)) {
|
||||||
|
@ -3228,7 +3227,7 @@ int main(int argc, char **argv)
|
||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
|
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
json oaicompat_result = format_final_response_oaicompat(data, result, completion_id);
|
json oaicompat_result = format_final_response_oaicompat(data, result);
|
||||||
|
|
||||||
res.set_content(oaicompat_result.dump(-1, ' ', false,
|
res.set_content(oaicompat_result.dump(-1, ' ', false,
|
||||||
json::error_handler_t::replace),
|
json::error_handler_t::replace),
|
||||||
|
@ -3239,7 +3238,8 @@ int main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
llama.queue_results.remove_waiting_task_id(task_id);
|
llama.queue_results.remove_waiting_task_id(task_id);
|
||||||
} else {
|
} else {
|
||||||
const auto chunked_content_provider = [task_id, &llama, &completion_id](size_t, httplib::DataSink &sink) {
|
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink &sink) {
|
||||||
|
const std::string completion_id = gen_chatcmplid();
|
||||||
while (true) {
|
while (true) {
|
||||||
task_result llama_result = llama.queue_results.recv(task_id);
|
task_result llama_result = llama.queue_results.recv(task_id);
|
||||||
if (!llama_result.error) {
|
if (!llama_result.error) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue