From a3aac23df169d4ea41419d81b7ffb9343a63af40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bjarke=20Viks=C3=B8e?= Date: Tue, 6 Aug 2024 21:26:27 +0200 Subject: [PATCH] server: when result doesn't fit in max_tokens, finished_reason should be length Fixes #8856 Be aware that finished_reason "length" was probably never reported before due to this bug. --- examples/server/utils.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index e6a1f0697..41cde6a7f 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -412,14 +412,15 @@ static json oaicompat_completion_params_parse( } static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) { - bool stopped_word = result.count("stopped_word") != 0; + bool unfinished = result.count("stopped_word") == 0; + bool stopped_word = json_value(result, "stopped_word", false); bool stopped_eos = json_value(result, "stopped_eos", false); int num_tokens_predicted = json_value(result, "tokens_predicted", 0); int num_prompt_tokens = json_value(result, "tokens_evaluated", 0); std::string content = json_value(result, "content", std::string("")); std::string finish_reason = "length"; - if (stopped_word || stopped_eos) { + if (stopped_word || stopped_eos || unfinished) { finish_reason = "stop"; }