From d7a877e2443ab60df538f726d4af8312a304215f Mon Sep 17 00:00:00 2001 From: ngxson Date: Thu, 4 Jul 2024 14:23:18 +0200 Subject: [PATCH] main: add need_insert_eot --- common/common.cpp | 5 ++++- examples/main/main.cpp | 22 +++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 2c05a4d4a..28118ee34 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1409,7 +1409,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param "halt generation at PROMPT, return control in interactive mode\n" "can be specified more than once for multiple prompts" }); options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" }); - options.push_back({ "main", "-cnv, --conversation", "run in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: %s)", params.conversation ? "true" : "false" }); + options.push_back({ "main", "-cnv, --conversation", "run in conversation mode, does not print special tokens and suffix/prefix\n" + "if suffix/prefix are not specified, default chat template will be used\n" + "(default: %s)", params.conversation ? "true" : "false" }); options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" }); options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" }); options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" }); @@ -1453,6 +1455,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "main", " --cfg-scale N", "strength of guidance (default: %.1f, 1.0 = disable)", (double)sparams.cfg_scale }); options.push_back({ "main", " --chat-template JINJA_TEMPLATE", "set custom jinja chat template (default: template taken from model's metadata)\n" + "if suffix/prefix are specified, template will be disabled\n" "only commonly used templates are accepted:\n" "https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" }); options.push_back({ "grammar" }); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index d512953b9..1cef429a7 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -37,7 +37,8 @@ static gpt_params * g_params; static std::vector * g_input_tokens; static std::ostringstream * g_output_ss; static std::vector * g_output_tokens; -static bool is_interacting = false; +static bool is_interacting = false; +static bool need_insert_eot = false; static bool file_exists(const std::string & path) { std::ifstream f(path.c_str()); @@ -99,7 +100,8 @@ static void write_logfile( static void sigint_handler(int signo) { if (signo == SIGINT) { if (!is_interacting && g_params->interactive) { - is_interacting = true; + is_interacting = true; + need_insert_eot = true; } else { console::cleanup(); printf("\n"); @@ -224,7 +226,14 @@ int main(int argc, char ** argv) { __func__, n_ctx_train, n_ctx); } - LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str()); + // print chat template example in conversation mode + if (params.conversation) { + if (params.enable_chat_template) { + LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str()); + } else { + LOG_TEE("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__); + } + } // print system information { @@ -885,6 +894,13 @@ int main(int argc, char ** argv) { LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); + // if user stop generation mid-way, we must add EOT to finish model's last response + if (need_insert_eot && format_chat) { + llama_token eot = llama_token_eot(model); + embd_inp.push_back(eot == -1 ? llama_token_eos(model) : eot); + need_insert_eot = false; + } + embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end()); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());