server: when result doesn't fit in max_tokens, finished_reason should be length

Fixes #8856

Be aware that finished_reason "length" was probably never reported before due to this bug.
This commit is contained in:
Bjarke Viksøe 2024-08-06 21:26:27 +02:00
parent 1e6f6554aa
commit a3aac23df1

View file

@ -412,14 +412,15 @@ static json oaicompat_completion_params_parse(
}
static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
bool stopped_word = result.count("stopped_word") != 0;
bool unfinished = result.count("stopped_word") == 0;
bool stopped_word = json_value(result, "stopped_word", false);
bool stopped_eos = json_value(result, "stopped_eos", false);
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
std::string content = json_value(result, "content", std::string(""));
std::string finish_reason = "length";
if (stopped_word || stopped_eos) {
if (stopped_word || stopped_eos || unfinished) {
finish_reason = "stop";
}