Fix new line issue with chat template, disable template when in-prefix/suffix is set (#8203)

* preserve new line llama_chat_format_single * disable chat template if in-prefix/suffix is set * remove redundant change
2024-06-30 20:27:13 +02:00 · 2024-06-30 20:27:13 +02:00 · 9ef0780062
commit 9ef0780062
parent 1c5eba6f8e
4 changed files with 23 additions and 9 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1014,16 +1014,19 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
    }
    if (arg == "--in-prefix-bos") {
        params.input_prefix_bos = true;
+        params.enable_chat_template = false;
        return true;
    }
    if (arg == "--in-prefix") {
        CHECK_ARG
        params.input_prefix = argv[i];
+        params.enable_chat_template = false;
        return true;
    }
    if (arg == "--in-suffix") {
        CHECK_ARG
        params.input_suffix = argv[i];
+        params.enable_chat_template = false;
        return true;
    }
    if (arg == "--spm-infill") {
@ -1406,7 +1409,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
                                                                        "halt generation at PROMPT, return control in interactive mode\n"
                                                                        "can be specified more than once for multiple prompts" });
    options.push_back({ "main",        "-sp,   --special",              "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
-    options.push_back({ "main",        "-cnv,  --conversation",         "run in conversation mode (does not print special tokens and suffix/prefix) (default: %s)", params.conversation ? "true" : "false" });
+    options.push_back({ "main",        "-cnv,  --conversation",         "run in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: %s)", params.conversation ? "true" : "false" });
    options.push_back({ "main infill", "-i,    --interactive",          "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
    options.push_back({ "main infill", "-if,   --interactive-first",    "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
    options.push_back({ "main infill", "-mli,  --multiline-input",      "allows you to write or paste multiple lines without ending each in '\\'" });
@ -2668,12 +2671,19 @@ std::string llama_chat_format_single(const struct llama_model * model,
        const std::vector<llama_chat_msg> & past_msg,
        const llama_chat_msg & new_msg,
        bool add_ass) {
+    std::ostringstream ss;
    auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false);
    std::vector<llama_chat_msg> chat_new(past_msg);
+    // if the past_msg ends with a newline, we must preserve it in the formatted version
+    if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
+        ss << "\n";
+    };
+    // format chat with new_msg
    chat_new.push_back(new_msg);
    auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass);
-    auto formatted = fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
-    return formatted;
+    // get the diff part
+    ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
+    return ss.str();
 }

 std::string llama_chat_format_example(const struct llama_model * model,