refactor
This commit is contained in:
parent
5f06d37baf
commit
d25cd7f9e4
2 changed files with 42 additions and 47 deletions
|
@ -3069,9 +3069,14 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
json body = json::parse(req.body);
|
||||
|
||||
if (body.contains("tools") && ctx_server.tool_format != LLAMA_TOOL_FORMAT_NOT_SUPPORTED) {
|
||||
if (body.contains("tools")) {
|
||||
if (ctx_server.tool_format != LLAMA_TOOL_FORMAT_NOT_SUPPORTED) {
|
||||
body["prompt"] = format_chat_with_tool(ctx_server.tool_format, body.at("messages"), body.at("tools"));
|
||||
body.erase(body.find("tools"));
|
||||
} else {
|
||||
res_error(res, format_error_response("This server does not support tool calls. Start it with `--tool-calls`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
json data = oaicompat_completion_params_parse(ctx_server.model, body, params.chat_template);
|
||||
|
|
|
@ -499,7 +499,15 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
|
|||
finish_reason = "length";
|
||||
}
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
auto wrap_choices = [&completion_id, &modelname](json choices) -> json {
|
||||
return json{
|
||||
{"choices", choices},
|
||||
{"created", std::time(0)},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}
|
||||
};
|
||||
};
|
||||
|
||||
json choices;
|
||||
json delta = has_tool_calls
|
||||
|
@ -519,22 +527,14 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
|
|||
{"delta", json::object()}}});
|
||||
if (has_tool_calls) {
|
||||
// tool call must be send as two updates
|
||||
json initial_ret = json{{"choices", json::array({json{
|
||||
json initial_ret = wrap_choices(json::array({
|
||||
json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", delta}}})},
|
||||
{"created", t},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
json second_ret = json{
|
||||
{"choices", choices},
|
||||
{"created", t},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
{"delta", delta},
|
||||
}
|
||||
}));
|
||||
json second_ret = wrap_choices(choices);
|
||||
return std::vector<json>({initial_ret, second_ret});
|
||||
}
|
||||
} else {
|
||||
|
@ -545,26 +545,22 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
|
|||
{"delta", json{{"role", "assistant"}}}}});
|
||||
} else {
|
||||
// We have to send this as two updates to conform to openai behavior
|
||||
json initial_ret = json{{"choices", json::array({json{
|
||||
json initial_ret = wrap_choices(json::array({
|
||||
json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{
|
||||
{"role", "assistant"}
|
||||
}}}})},
|
||||
{"created", t},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
json second_ret = json{
|
||||
{"choices", json::array({json{{"finish_reason", nullptr},
|
||||
{"role", "assistant"},
|
||||
}},
|
||||
}
|
||||
}));
|
||||
json second_ret = wrap_choices(json::array({
|
||||
json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", delta}}})},
|
||||
{"created", t},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
{"delta", delta},
|
||||
}
|
||||
}));
|
||||
return std::vector<json>({initial_ret, second_ret});
|
||||
}
|
||||
} else {
|
||||
|
@ -582,13 +578,7 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
|
|||
}
|
||||
}
|
||||
|
||||
json ret = json {
|
||||
{"choices", choices},
|
||||
{"created", t},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}
|
||||
};
|
||||
json ret = wrap_choices(choices);
|
||||
if (!finish_reason.empty()) {
|
||||
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
||||
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue