Unify content + message in server_task_result_cmpl_final (+ avoid string copy)
This commit is contained in:
parent
77c60e662e
commit
d86a1ae80d
1 changed files with 12 additions and 12 deletions
|
@ -533,7 +533,7 @@ struct completion_token_output {
|
||||||
struct server_task_result_cmpl_final : server_task_result {
|
struct server_task_result_cmpl_final : server_task_result {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
||||||
std::string content;
|
common_chat_msg message;
|
||||||
llama_tokens tokens;
|
llama_tokens tokens;
|
||||||
|
|
||||||
bool stream;
|
bool stream;
|
||||||
|
@ -559,7 +559,6 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
|
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
|
||||||
std::string oaicompat_model;
|
std::string oaicompat_model;
|
||||||
std::string oaicompat_cmpl_id;
|
std::string oaicompat_cmpl_id;
|
||||||
common_chat_msg oaicompat_chat_msg;
|
|
||||||
|
|
||||||
virtual int get_index() override {
|
virtual int get_index() override {
|
||||||
return index;
|
return index;
|
||||||
|
@ -585,7 +584,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
json to_json_non_oaicompat() {
|
json to_json_non_oaicompat() {
|
||||||
json res = json {
|
json res = json {
|
||||||
{"index", index},
|
{"index", index},
|
||||||
{"content", stream ? "" : content}, // in stream mode, content is already in last partial chunk
|
{"content", stream ? "" : message.content}, // in stream mode, content is already in last partial chunk
|
||||||
{"tokens", stream ? llama_tokens {} : tokens},
|
{"tokens", stream ? llama_tokens {} : tokens},
|
||||||
{"id_slot", id_slot},
|
{"id_slot", id_slot},
|
||||||
{"stop", true},
|
{"stop", true},
|
||||||
|
@ -622,7 +621,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
json res = json {
|
json res = json {
|
||||||
{"choices", json::array({
|
{"choices", json::array({
|
||||||
json{
|
json{
|
||||||
{"text", stream ? "" : content}, // in stream mode, content is already in last partial chunk
|
{"text", stream ? "" : message.content}, // in stream mode, content is already in last partial chunk
|
||||||
{"index", index},
|
{"index", index},
|
||||||
{"logprobs", logprobs},
|
{"logprobs", logprobs},
|
||||||
{"finish_reason", finish_reason},
|
{"finish_reason", finish_reason},
|
||||||
|
@ -654,13 +653,13 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
json to_json_oaicompat_chat() {
|
json to_json_oaicompat_chat() {
|
||||||
std::string finish_reason = "length";
|
std::string finish_reason = "length";
|
||||||
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
|
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
|
||||||
finish_reason = oaicompat_chat_msg.tool_calls.empty() ? "stop" : "tool_calls";
|
finish_reason = message.tool_calls.empty() ? "stop" : "tool_calls";
|
||||||
}
|
}
|
||||||
|
|
||||||
json tool_calls;
|
json tool_calls;
|
||||||
if (!oaicompat_chat_msg.tool_calls.empty()) {
|
if (!message.tool_calls.empty()) {
|
||||||
tool_calls = json::array();
|
tool_calls = json::array();
|
||||||
for (const auto & tc : oaicompat_chat_msg.tool_calls) {
|
for (const auto & tc : message.tool_calls) {
|
||||||
tool_calls.push_back({
|
tool_calls.push_back({
|
||||||
{"type", "function"},
|
{"type", "function"},
|
||||||
{"function", {
|
{"function", {
|
||||||
|
@ -676,7 +675,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
{"finish_reason", finish_reason},
|
{"finish_reason", finish_reason},
|
||||||
{"index", 0},
|
{"index", 0},
|
||||||
{"message", json {
|
{"message", json {
|
||||||
{"content", oaicompat_chat_msg.content},
|
{"content", message.content},
|
||||||
{"tool_calls", tool_calls},
|
{"tool_calls", tool_calls},
|
||||||
{"role", "assistant"},
|
{"role", "assistant"},
|
||||||
}},
|
}},
|
||||||
|
@ -2283,7 +2282,6 @@ struct server_context {
|
||||||
res->id_slot = slot.id;
|
res->id_slot = slot.id;
|
||||||
|
|
||||||
res->index = slot.index;
|
res->index = slot.index;
|
||||||
res->content = slot.generated_text;
|
|
||||||
res->tokens = slot.generated_tokens;
|
res->tokens = slot.generated_tokens;
|
||||||
res->timings = slot.get_timings();
|
res->timings = slot.get_timings();
|
||||||
res->prompt = common_detokenize(ctx, slot.prompt_tokens, true);
|
res->prompt = common_detokenize(ctx, slot.prompt_tokens, true);
|
||||||
|
@ -2304,11 +2302,11 @@ struct server_context {
|
||||||
res->oaicompat_model = slot.params.oaicompat_model;
|
res->oaicompat_model = slot.params.oaicompat_model;
|
||||||
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
|
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
|
||||||
if (slot.params.chat_parser) {
|
if (slot.params.chat_parser) {
|
||||||
res->oaicompat_chat_msg = slot.params.chat_parser(slot.generated_text);
|
res->message = slot.params.chat_parser(slot.generated_text);
|
||||||
} else {
|
} else {
|
||||||
res->oaicompat_chat_msg = {
|
res->message = {
|
||||||
/* .role = */ "assistant",
|
/* .role = */ "assistant",
|
||||||
/* .content = */ slot.generated_text,
|
/* .content = */ std::move(slot.generated_text),
|
||||||
/* .tool_calls = */ {}
|
/* .tool_calls = */ {}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -3838,6 +3836,8 @@ int main(int argc, char ** argv) {
|
||||||
// OAI-compat
|
// OAI-compat
|
||||||
task.params.oaicompat = oaicompat;
|
task.params.oaicompat = oaicompat;
|
||||||
task.params.oaicompat_cmpl_id = completion_id;
|
task.params.oaicompat_cmpl_id = completion_id;
|
||||||
|
|
||||||
|
// Grammar & tool-calls
|
||||||
task.params.sampling.grammar = chat_params.grammar;
|
task.params.sampling.grammar = chat_params.grammar;
|
||||||
task.params.sampling.grammar_lazy = chat_params.grammar_lazy;
|
task.params.sampling.grammar_lazy = chat_params.grammar_lazy;
|
||||||
for (const auto & trigger : chat_params.grammar_triggers) {
|
for (const auto & trigger : chat_params.grammar_triggers) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue