server: when result doesn't fit in max_tokens, finished_reason should be length

Fixes #8856 Be aware that finished_reason "length" was probably never reported before due to this bug.
2024-08-06 21:26:27 +02:00 · 2024-08-06 21:26:27 +02:00 · a3aac23df1
commit a3aac23df1
parent 1e6f6554aa
1 changed files with 3 additions and 2 deletions
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -412,14 +412,15 @@ static json oaicompat_completion_params_parse(
 }

 static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
-    bool stopped_word        = result.count("stopped_word") != 0;
+    bool unfinished          = result.count("stopped_word") == 0;
+    bool stopped_word        = json_value(result, "stopped_word", false);
    bool stopped_eos         = json_value(result, "stopped_eos", false);
    int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
    int num_prompt_tokens    = json_value(result, "tokens_evaluated", 0);
    std::string content      = json_value(result, "content", std::string(""));

    std::string finish_reason = "length";
-    if (stopped_word || stopped_eos) {
+    if (stopped_word || stopped_eos || unfinished) {
        finish_reason = "stop";
    }