From f94fed92d31439f0e3ef66ace79fbf251a0052e5 Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Sun, 12 May 2024 15:59:37 +0530 Subject: [PATCH] ChatON+MetaHpp: Had forgotten to conv reverse-prompt Also has dump was using get_value calls with fallback to default, so it wasnt identifying the missed field. Have fixed both of those. Also reconverted meta json file. Misc: interesting avesham and aattam --- common/chaton.hpp | 15 ++++++++++----- common/chaton_meta.hpp | 14 ++++++++++++++ scripts/chaton-meta-json-to-hpp.py | 5 +++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/common/chaton.hpp b/common/chaton.hpp index 4f6436dbb..83ba104d2 100644 --- a/common/chaton.hpp +++ b/common/chaton.hpp @@ -781,6 +781,11 @@ inline bool _chaton_meta_dump(std::string &tmpl) { std::string assistantPrefix = gCT.get_value(tmpl, { K_ASSISTANT, K_PREFIX }); std::string assistantSuffix = gCT.get_value(tmpl, { K_ASSISTANT, K_SUFFIX }); std::string assistantEnd = gCT.get_value(tmpl, { K_ASSISTANT, K_END }); + std::string reversePrompt = gCT.get_value(tmpl, { K_REVERSE_PROMPT }); + bool systemHasSuffix = gCT.get_value(tmpl, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }); + bool systemHasEnd = gCT.get_value(tmpl, { K_SYSTEMUSER_SYSTEM_HAS_END }); + bool userHasBegin = gCT.get_value(tmpl, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }); + bool userHasPrefix = gCT.get_value(tmpl, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }); LOGXLN("INFO:%s:%s:%s", __func__, "global->begin", globalBegin.c_str()); LOGXLN("INFO:%s:%s:%s", __func__, "global->end", globalEnd.c_str()); @@ -796,11 +801,11 @@ inline bool _chaton_meta_dump(std::string &tmpl) { LOGXLN("INFO:%s:%s:%s", __func__, "assistant->prefix", assistantPrefix.c_str()); LOGXLN("INFO:%s:%s:%s", __func__, "assistant->suffix", assistantSuffix.c_str()); LOGXLN("INFO:%s:%s:%s", __func__, "assistant->end", assistantEnd.c_str()); - LOGXLN("INFO:%s:%s:%s", __func__, K_REVERSE_PROMPT, chaton_tmpl_kv(tmpl, K_REVERSE_PROMPT).c_str()); - LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_SYSTEM_HAS_SUFFIX, chaton_tmpl_kv_bool(tmpl, K_SYSTEMUSER_SYSTEM_HAS_SUFFIX)); - LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_SYSTEM_HAS_END, chaton_tmpl_kv_bool(tmpl, K_SYSTEMUSER_SYSTEM_HAS_END)); - LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_1ST_USER_HAS_BEGIN, chaton_tmpl_kv_bool(tmpl, K_SYSTEMUSER_1ST_USER_HAS_BEGIN)); - LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_1ST_USER_HAS_PREFIX, chaton_tmpl_kv_bool(tmpl, K_SYSTEMUSER_1ST_USER_HAS_PREFIX)); + LOGXLN("INFO:%s:%s:%s", __func__, K_REVERSE_PROMPT, reversePrompt.c_str()); + LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_SYSTEM_HAS_SUFFIX, systemHasSuffix); + LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_SYSTEM_HAS_END, systemHasEnd); + LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_1ST_USER_HAS_BEGIN, userHasBegin); + LOGXLN("INFO:%s:%s:%d", __func__, K_SYSTEMUSER_1ST_USER_HAS_PREFIX, userHasPrefix); if (!userEnd.empty()) { LOG_TEELN("WARN:%s:User->End seems to be set to [%s], do cross check if this is proper and needed", __func__, userEnd.c_str()); diff --git a/common/chaton_meta.hpp b/common/chaton_meta.hpp index 3db8c08be..3516d5bb9 100644 --- a/common/chaton_meta.hpp +++ b/common/chaton_meta.hpp @@ -19,6 +19,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "" }, { "assistant-suffix", "" }, { "assistant-end", "" }, + { "reverse-prompt", "" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", false }, { "systemuser-1st-user-has-begin", false }, @@ -39,6 +40,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "<|start_header_id|>assistant<|end_header_id|>\n" }, { "assistant-suffix", "<|eot_id|>\n\n" }, { "assistant-end", "" }, + { "reverse-prompt", "<|eot_id|>" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, @@ -59,6 +61,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "<|im_start|>assistant\n" }, { "assistant-suffix", "<|im_end|>\n" }, { "assistant-end", "" }, + { "reverse-prompt", "<|im_start|>user\n" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, @@ -79,6 +82,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "<|assistant|>\n" }, { "assistant-suffix", "" }, { "assistant-end", "\n" }, + { "reverse-prompt", "" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, @@ -99,6 +103,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "model\n" }, { "assistant-suffix", "\n" }, { "assistant-end", "" }, + { "reverse-prompt", "" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", false }, { "systemuser-1st-user-has-begin", true }, @@ -119,6 +124,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "### Response:\n" }, { "assistant-suffix", "\n<|EOT|>\n" }, { "assistant-end", "" }, + { "reverse-prompt", "<|EOT|>" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", false }, { "systemuser-1st-user-has-begin", false }, @@ -139,6 +145,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "Assistant: " }, { "assistant-suffix", " <|end▁of▁sentence|>\n" }, { "assistant-end", "" }, + { "reverse-prompt", "<|end▁of▁sentence|>" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", false }, { "systemuser-1st-user-has-begin", false }, @@ -159,6 +166,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "assistant\n" }, { "assistant-suffix", "" }, { "assistant-end", "\n" }, + { "reverse-prompt", "" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, @@ -179,6 +187,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "" }, { "assistant-suffix", "" }, { "assistant-end", " \n" }, + { "reverse-prompt", "" }, { "systemuser-system-has-suffix", false }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", false }, @@ -199,6 +208,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "<|assistant|>\n" }, { "assistant-suffix", "<|end|>\n" }, { "assistant-end", "" }, + { "reverse-prompt", "<|end|>" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, @@ -219,6 +229,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" }, { "assistant-suffix", "<|END_OF_TURN_TOKEN|>" }, { "assistant-end", "" }, + { "reverse-prompt", "<|END_OF_TURN_TOKEN|>" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, @@ -239,6 +250,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "" }, { "assistant-suffix", "" }, { "assistant-end", "" }, + { "reverse-prompt", "" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", false }, @@ -259,6 +271,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "GPT4 Correct Assistant: " }, { "assistant-suffix", "<|end_of_turn|>" }, { "assistant-end", "" }, + { "reverse-prompt", "<|end_of_turn|>" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, @@ -279,6 +292,7 @@ ChatTemplates gCT = {{ { "assistant-prefix", "ASSISTANT: " }, { "assistant-suffix", "\n" }, { "assistant-end", "" }, + { "reverse-prompt", "" }, { "systemuser-system-has-suffix", true }, { "systemuser-system-has-end", true }, { "systemuser-1st-user-has-begin", true }, diff --git a/scripts/chaton-meta-json-to-hpp.py b/scripts/chaton-meta-json-to-hpp.py index 6c08ca4db..d72c4cdf5 100755 --- a/scripts/chaton-meta-json-to-hpp.py +++ b/scripts/chaton-meta-json-to-hpp.py @@ -9,6 +9,9 @@ import json def kkv_str(j, tmpl, k1, k2, comma): print("\t\t{{ \"{}\", \"{}\" }}{}".format("{}-{}".format(k1,k2), repr(j[tmpl][k1][k2])[1:-1], comma)) +def kv_str(j, tmpl, k1, comma): + print("\t\t{{ \"{}\", \"{}\" }}{}".format(k1, repr(j[tmpl][k1])[1:-1], comma)) + def kv_bool(j, tmpl, k1, comma): print("\t\t{{ \"{}\", {} }}{}".format(k1, repr(j[tmpl][k1]).lower(), comma)) @@ -39,6 +42,8 @@ for tmpl in j: kkv_str(j, tmpl, "assistant", "suffix", ",") kkv_str(j, tmpl, "assistant", "end", ",") + kv_str(j, tmpl, "reverse-prompt", ",") + kv_bool(j, tmpl, "systemuser-system-has-suffix", ",") kv_bool(j, tmpl, "systemuser-system-has-end", ",") kv_bool(j, tmpl, "systemuser-1st-user-has-begin", ",")