ChatON:Cleanup: BeginEnd, Debug log
Update the note Rename global-prefix|suffix to global-begin|end. Rename chat-apply-template to chat-apply-template-single, cas it handles only a single message. Add some debug log messages to the helper functions
This commit is contained in:
parent
d87d27512e
commit
c4cf0e9075
3 changed files with 26 additions and 17 deletions
|
@ -1,14 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
/***
|
||||
* Keep chatting with model and needed role tagging using special tokens simple and flexible, while building on existing interactive flow
|
||||
* Keep chatting with model and needed role tagging using special tokens simple and flexible,
|
||||
* while building on existing interactive flow and its in-prefix, in-suffix and antiprompt/reverse-promot
|
||||
*
|
||||
* 1. Use a json file to configure the needed tags for each of the supported chat-handshake-template-standard
|
||||
* a. system-prefix, system-suffix,
|
||||
* b. user-prefix, user-suffix,
|
||||
* b. user-prefix, user-suffix, assistant-prefix
|
||||
* * these override the in-prefix and in-suffix
|
||||
* c. reverse-prompt
|
||||
* d. global-begin-marker, global-end-marker
|
||||
* e. per-msg-begin-marker, per-msg-end-marker
|
||||
* f. is per-msg-begin-marker used for system+user combo
|
||||
* d. Later if required look at adding
|
||||
* * global-begin-marker, global-end-marker
|
||||
* * per-msg-begin-marker, per-msg-end-marker
|
||||
* * is system-per-msg-end-marker and user-per-msg-begin-marker used for system+user combo
|
||||
* 2. Give the below option to user wrt system prompt, this should give the flexibility to either keep system prompt simple or complex in a flexible yet simple way.
|
||||
* a. the system prompt they specify using -f, is used as is with parse_special when tokenising or
|
||||
* b. whether the system prefix and suffix is added, but without parse_special tokenisation of system-prompt provided by user.
|
||||
|
@ -51,19 +55,24 @@ inline void chaton_meta_dump() {
|
|||
LOG_TEELN("\n\nINFO:%s:ChatOn Meta\n%s", __func__, conMeta.dump(4).c_str());
|
||||
}
|
||||
|
||||
inline std::string chaton_tmpl_apply(const std::string &tmpl, const std::string &role, const std::string &content) {
|
||||
inline std::string chaton_tmpl_apply_single(const std::string &tmpl, const std::string &role, const std::string &content) {
|
||||
std::stringstream ss;
|
||||
ss << conMeta[tmpl]["global"]["prefix"];
|
||||
ss << conMeta[tmpl]["global"]["begin"];
|
||||
ss << conMeta[tmpl][role]["prefix"] << content << conMeta[tmpl][role]["suffix"];
|
||||
ss << conMeta[tmpl]["global"]["suffix"];
|
||||
ss << conMeta[tmpl]["global"]["end"];
|
||||
std::string taggedStr = ss.str();
|
||||
LOG_TEELN("DBUG:%s:%s:%s:%s", __func__, tmpl.c_str(), role.c_str(), taggedStr.c_str());
|
||||
return taggedStr;
|
||||
}
|
||||
|
||||
inline std::string chaton_tmpl_role_part(const std::string &tmpl, const std::string &role, const std::string &part) {
|
||||
return conMeta[tmpl][role][part];
|
||||
std::string got = conMeta[tmpl][role][part];
|
||||
LOG_TEELN("DBUG:%s:%s:%s:%s:%s", __func__, tmpl.c_str(), role.c_str(), part.c_str(), got.c_str());
|
||||
return got;
|
||||
}
|
||||
|
||||
inline std::string chaton_tmpl_part(const std::string &tmpl, const std::string &part) {
|
||||
return conMeta[tmpl][part];
|
||||
std::string got = conMeta[tmpl][part];
|
||||
LOG_TEELN("DBUG:%s:%s:%s:%s", __func__, tmpl.c_str(), part.c_str(), got.c_str());
|
||||
return got;
|
||||
}
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
{
|
||||
"llama2": {
|
||||
"global": {
|
||||
"prefix": "",
|
||||
"suffix": ""
|
||||
"begin": "",
|
||||
"end": ""
|
||||
},
|
||||
"system": {
|
||||
"prefix": "[INST] <<SYS>>\n",
|
||||
|
@ -21,8 +21,8 @@
|
|||
},
|
||||
"llama3": {
|
||||
"global": {
|
||||
"prefix": "",
|
||||
"suffix": ""
|
||||
"begin": "",
|
||||
"end": ""
|
||||
},
|
||||
"system": {
|
||||
"prefix": "<|start_header_id|>system<|end_header_id|>\n",
|
||||
|
@ -40,8 +40,8 @@
|
|||
},
|
||||
"chatml": {
|
||||
"global": {
|
||||
"prefix": "",
|
||||
"suffix": ""
|
||||
"begin": "",
|
||||
"end": ""
|
||||
},
|
||||
"system": {
|
||||
"prefix": "<|im_start|>system\n",
|
||||
|
|
|
@ -262,7 +262,7 @@ int main(int argc, char ** argv) {
|
|||
params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>";
|
||||
}
|
||||
if (params.chaton) {
|
||||
params.prompt = chaton_tmpl_apply(params.chaton_template_id, "system", params.prompt);
|
||||
params.prompt = chaton_tmpl_apply_single(params.chaton_template_id, "system", params.prompt);
|
||||
}
|
||||
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue