From 0a8797b28eacb3916f0b9be137d62c9725b66918 Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Sat, 20 Apr 2024 18:40:55 +0530 Subject: [PATCH] Main:Update to support chaton mode Glanced through existing interactive and chatml flow, to incorporate this flow. Need to look deeper later. NOTE: Till this point is reapplying of my initial go at chaton, by simplifying the amount of change done to existing code, a bitmore. --- examples/main/main.cpp | 48 ++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 249fc2bb6..a073a7bfd 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -1,4 +1,5 @@ #include "common.h" +#include "chaton.hpp" #include "console.h" #include "llama.h" @@ -251,11 +252,14 @@ int main(int argc, char ** argv) { std::vector embd_inp; - if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) { - LOG("tokenize the prompt\n"); + if (params.interactive_first || params.instruct || params.chatml || params.chaton || !params.prompt.empty() || session_tokens.empty()) { + LOG("tokenize the prompt: %s\n", params.prompt.c_str()); if (params.chatml) { params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>"; } + if (params.chaton) { + params.prompt = llama_chat_apply_template_simple(params.chaton_template_id, "system", params.prompt, false); + } embd_inp = ::llama_tokenize(ctx, params.prompt, true, true); } else { LOG("use session tokens\n"); @@ -333,7 +337,7 @@ int main(int argc, char ** argv) { } // number of tokens to keep when resetting context - if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct || params.chatml) { + if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct || params.chatml || params.chaton) { params.n_keep = (int)embd_inp.size(); } else { params.n_keep += add_bos; // always keep the BOS token @@ -363,6 +367,19 @@ int main(int argc, char ** argv) { params.interactive_first = true; params.antiprompt.emplace_back("<|im_start|>user\n"); } + // handle chaton mode, it adds on to any reverse prompt specified explicitly by the user + if (params.chaton) { + params.interactive_first = true; + std::vector resp_ends = llama_chat_reverse_prompt(params.chaton_template_id); + if (resp_ends.size() == 0) { + LOG_TEELN("ERRR:%s:ChatOn:Unsupported ChatType:%s", __func__, params.chaton_template_id.c_str()); + exit(1); + } + for (size_t i = 0; i < resp_ends.size(); i++) + { + params.antiprompt.emplace_back(resp_ends[i]); + } + } // enable interactive mode if interactive start is specified if (params.interactive_first) { @@ -817,7 +834,7 @@ int main(int argc, char ** argv) { if (n_past > 0 && is_interacting) { LOG("waiting for user input\n"); - if (params.instruct || params.chatml) { + if (params.instruct || params.chatml || params.chaton) { printf("\n> "); } @@ -876,15 +893,23 @@ int main(int argc, char ** argv) { process_escapes(buffer); } - const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true); - const auto line_inp = ::llama_tokenize(ctx, buffer, false, false); - const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true); + std::vector line_inp; + if (params.chaton) { + std::string f_chat = llama_chat_apply_template_simple(params.chaton_template_id, "user", buffer.c_str(), true); + line_inp = ::llama_tokenize(ctx, f_chat, false, true); + LOG("formatted input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); + embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); + } else { + const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true); + line_inp = ::llama_tokenize(ctx, buffer, false, false); + const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true); - LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); + LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); - embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end()); - embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); - embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end()); + embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end()); + embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); + embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end()); + } // instruct mode: insert response suffix if (params.instruct) { @@ -921,6 +946,7 @@ int main(int argc, char ** argv) { } // end of text token + // chaton expected to be used along with interactive argument, so not checking for chaton seperately if (!embd.empty() && embd.back() == llama_token_eos(model) && !(params.instruct || params.interactive || params.chatml)) { LOG_TEE(" [end of text]\n"); break;