From c4cf0e9075f7c3815de03395efe19ff2110f83e4 Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Mon, 22 Apr 2024 23:48:24 +0530 Subject: [PATCH] ChatON:Cleanup: BeginEnd, Debug log Update the note Rename global-prefix|suffix to global-begin|end. Rename chat-apply-template to chat-apply-template-single, cas it handles only a single message. Add some debug log messages to the helper functions --- common/chaton.hpp | 29 +++++++++++++++++++---------- examples/chaton_meta.json | 12 ++++++------ examples/main/main.cpp | 2 +- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/common/chaton.hpp b/common/chaton.hpp index c62ab146e..5db64c631 100644 --- a/common/chaton.hpp +++ b/common/chaton.hpp @@ -1,14 +1,18 @@ #pragma once /*** - * Keep chatting with model and needed role tagging using special tokens simple and flexible, while building on existing interactive flow + * Keep chatting with model and needed role tagging using special tokens simple and flexible, + * while building on existing interactive flow and its in-prefix, in-suffix and antiprompt/reverse-promot + * * 1. Use a json file to configure the needed tags for each of the supported chat-handshake-template-standard * a. system-prefix, system-suffix, - * b. user-prefix, user-suffix, + * b. user-prefix, user-suffix, assistant-prefix + * * these override the in-prefix and in-suffix * c. reverse-prompt - * d. global-begin-marker, global-end-marker - * e. per-msg-begin-marker, per-msg-end-marker - * f. is per-msg-begin-marker used for system+user combo + * d. Later if required look at adding + * * global-begin-marker, global-end-marker + * * per-msg-begin-marker, per-msg-end-marker + * * is system-per-msg-end-marker and user-per-msg-begin-marker used for system+user combo * 2. Give the below option to user wrt system prompt, this should give the flexibility to either keep system prompt simple or complex in a flexible yet simple way. * a. the system prompt they specify using -f, is used as is with parse_special when tokenising or * b. whether the system prefix and suffix is added, but without parse_special tokenisation of system-prompt provided by user. @@ -51,19 +55,24 @@ inline void chaton_meta_dump() { LOG_TEELN("\n\nINFO:%s:ChatOn Meta\n%s", __func__, conMeta.dump(4).c_str()); } -inline std::string chaton_tmpl_apply(const std::string &tmpl, const std::string &role, const std::string &content) { +inline std::string chaton_tmpl_apply_single(const std::string &tmpl, const std::string &role, const std::string &content) { std::stringstream ss; - ss << conMeta[tmpl]["global"]["prefix"]; + ss << conMeta[tmpl]["global"]["begin"]; ss << conMeta[tmpl][role]["prefix"] << content << conMeta[tmpl][role]["suffix"]; - ss << conMeta[tmpl]["global"]["suffix"]; + ss << conMeta[tmpl]["global"]["end"]; std::string taggedStr = ss.str(); + LOG_TEELN("DBUG:%s:%s:%s:%s", __func__, tmpl.c_str(), role.c_str(), taggedStr.c_str()); return taggedStr; } inline std::string chaton_tmpl_role_part(const std::string &tmpl, const std::string &role, const std::string &part) { - return conMeta[tmpl][role][part]; + std::string got = conMeta[tmpl][role][part]; + LOG_TEELN("DBUG:%s:%s:%s:%s:%s", __func__, tmpl.c_str(), role.c_str(), part.c_str(), got.c_str()); + return got; } inline std::string chaton_tmpl_part(const std::string &tmpl, const std::string &part) { - return conMeta[tmpl][part]; + std::string got = conMeta[tmpl][part]; + LOG_TEELN("DBUG:%s:%s:%s:%s", __func__, tmpl.c_str(), part.c_str(), got.c_str()); + return got; } diff --git a/examples/chaton_meta.json b/examples/chaton_meta.json index 8301f4221..4711ae7cd 100644 --- a/examples/chaton_meta.json +++ b/examples/chaton_meta.json @@ -2,8 +2,8 @@ { "llama2": { "global": { - "prefix": "", - "suffix": "" + "begin": "", + "end": "" }, "system": { "prefix": "[INST] <>\n", @@ -21,8 +21,8 @@ }, "llama3": { "global": { - "prefix": "", - "suffix": "" + "begin": "", + "end": "" }, "system": { "prefix": "<|start_header_id|>system<|end_header_id|>\n", @@ -40,8 +40,8 @@ }, "chatml": { "global": { - "prefix": "", - "suffix": "" + "begin": "", + "end": "" }, "system": { "prefix": "<|im_start|>system\n", diff --git a/examples/main/main.cpp b/examples/main/main.cpp index f58888e99..d16312a5e 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -262,7 +262,7 @@ int main(int argc, char ** argv) { params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>"; } if (params.chaton) { - params.prompt = chaton_tmpl_apply(params.chaton_template_id, "system", params.prompt); + params.prompt = chaton_tmpl_apply_single(params.chaton_template_id, "system", params.prompt); } embd_inp = ::llama_tokenize(ctx, params.prompt, true, true); } else {