ChatOn: Add SystemUser-1st-User-Has-Prefix flag support
Llama2 seems to need it, so chaton-meta-json sample file updated to use same.
This commit is contained in:
parent
f03dd2439f
commit
221ccd6462
2 changed files with 44 additions and 9 deletions
|
@ -9,10 +9,10 @@
|
||||||
* b. user-prefix, user-suffix, assistant-prefix
|
* b. user-prefix, user-suffix, assistant-prefix
|
||||||
* * these override the in-prefix and in-suffix
|
* * these override the in-prefix and in-suffix
|
||||||
* c. reverse-prompt
|
* c. reverse-prompt
|
||||||
* d. Later if required look at adding
|
* d. global-begin, global-end
|
||||||
* * global-begin-marker, global-end-marker
|
* d. systemuser-1st-user-has-prefix
|
||||||
* * per-msg-begin-marker, per-msg-end-marker
|
* * if a combination of 1 system message followed by 1 or more user messages is seen,
|
||||||
* * is system-per-msg-end-marker and user-per-msg-begin-marker used for system+user combo
|
* then include user prefix only if this flag is set.
|
||||||
* 2. Give the below option to user wrt system prompt, this should give the flexibility to either keep system prompt simple or complex in a flexible yet simple way.
|
* 2. Give the below option to user wrt system prompt, this should give the flexibility to either keep system prompt simple or complex in a flexible yet simple way.
|
||||||
* a. the system prompt they specify using -f, is used as is with parse_special when tokenising or
|
* a. the system prompt they specify using -f, is used as is with parse_special when tokenising or
|
||||||
* b. whether the system prefix and suffix is added, but without parse_special tokenisation of system-prompt provided by user.
|
* b. whether the system prefix and suffix is added, but without parse_special tokenisation of system-prompt provided by user.
|
||||||
|
@ -30,6 +30,14 @@
|
||||||
|
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
|
||||||
|
|
||||||
|
const auto K_SYSTEM = "system";
|
||||||
|
const auto K_USER = "user";
|
||||||
|
const auto K_PREFIX = "prefix";
|
||||||
|
const auto K_SUFFIX = "suffix";
|
||||||
|
const auto K_SYSTEMUSER_1ST_USER_HAS_PREFIX = "systemuser-1st-user-has-prefix";
|
||||||
|
|
||||||
|
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
|
|
||||||
json conMeta;
|
json conMeta;
|
||||||
|
@ -55,6 +63,7 @@ inline void chaton_meta_dump() {
|
||||||
LOG_TEELN("\n\nINFO:%s:ChatOn Meta\n%s", __func__, conMeta.dump(4).c_str());
|
LOG_TEELN("\n\nINFO:%s:ChatOn Meta\n%s", __func__, conMeta.dump(4).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return user-prefix + msg + user-suffix
|
||||||
// NOTE: This currently doesnt return about which parts of the tagged message contain tags and which parts the user message
|
// NOTE: This currently doesnt return about which parts of the tagged message contain tags and which parts the user message
|
||||||
inline std::string chaton_tmpl_apply_single(const std::string &tmpl, const std::string &role, const std::string &content) {
|
inline std::string chaton_tmpl_apply_single(const std::string &tmpl, const std::string &role, const std::string &content) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
@ -64,14 +73,37 @@ inline std::string chaton_tmpl_apply_single(const std::string &tmpl, const std::
|
||||||
return taggedStr;
|
return taggedStr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// global-begin + [role-prefix + msg + role-suffix] + global-end
|
||||||
|
// if there is a combination of system-user messages,
|
||||||
|
// then 1st user message will have user-prefix only if systemuser-1st-user-has-prefix is true
|
||||||
// NOTE: This currently doesnt return about which parts of the tagged message contain tags and which parts the user message
|
// NOTE: This currently doesnt return about which parts of the tagged message contain tags and which parts the user message
|
||||||
inline std::string chaton_tmpl_apply(const std::string &tmpl, const std::vector<llama_chat_message> &msgs) {
|
inline std::string chaton_tmpl_apply(const std::string &tmpl, const std::vector<llama_chat_message> &msgs) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << conMeta[tmpl]["global"]["begin"];
|
ss << conMeta[tmpl]["global"]["begin"];
|
||||||
for(auto msg: msgs) {
|
int cntSystem = 0;
|
||||||
|
int cntUser = 0;
|
||||||
|
int cntOthers = 0;
|
||||||
|
for(const auto msg: msgs) {
|
||||||
auto role = msg.role;
|
auto role = msg.role;
|
||||||
auto content = msg.content;
|
auto content = msg.content;
|
||||||
ss << conMeta[tmpl][role]["prefix"] << content << conMeta[tmpl][role]["suffix"];
|
auto prefix = conMeta[tmpl][role][K_PREFIX];
|
||||||
|
if (role == K_SYSTEM) {
|
||||||
|
cntSystem += 1;
|
||||||
|
ss << prefix;
|
||||||
|
} else if (role == K_USER) {
|
||||||
|
cntUser += 1;
|
||||||
|
if ((cntSystem == 1) && (cntUser == 1)) {
|
||||||
|
if (conMeta[tmpl][K_SYSTEMUSER_1ST_USER_HAS_PREFIX]) {
|
||||||
|
ss << prefix;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ss << prefix;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cntOthers += 1;
|
||||||
|
ss << prefix;
|
||||||
|
}
|
||||||
|
ss << content << conMeta[tmpl][role]["suffix"];
|
||||||
}
|
}
|
||||||
ss << conMeta[tmpl]["global"]["end"];
|
ss << conMeta[tmpl]["global"]["end"];
|
||||||
std::string taggedMsgs = ss.str();
|
std::string taggedMsgs = ss.str();
|
||||||
|
|
|
@ -17,7 +17,8 @@
|
||||||
"prefix": "",
|
"prefix": "",
|
||||||
"suffix": ""
|
"suffix": ""
|
||||||
},
|
},
|
||||||
"reverse-prompt": "</s>"
|
"reverse-prompt": "</s>",
|
||||||
|
"systemuser-1st-user-has-prefix": false
|
||||||
},
|
},
|
||||||
"llama3": {
|
"llama3": {
|
||||||
"global": {
|
"global": {
|
||||||
|
@ -36,7 +37,8 @@
|
||||||
"prefix": "<|start_header_id|>assistant<|end_header_id|>\n",
|
"prefix": "<|start_header_id|>assistant<|end_header_id|>\n",
|
||||||
"suffix": ""
|
"suffix": ""
|
||||||
},
|
},
|
||||||
"reverse-prompt": "<|eot_id|>"
|
"reverse-prompt": "<|eot_id|>",
|
||||||
|
"systemuser-1st-user-has-prefix": true
|
||||||
},
|
},
|
||||||
"chatml": {
|
"chatml": {
|
||||||
"global": {
|
"global": {
|
||||||
|
@ -55,7 +57,8 @@
|
||||||
"prefix": "<|im_start|>assistant\n",
|
"prefix": "<|im_start|>assistant\n",
|
||||||
"suffix": ""
|
"suffix": ""
|
||||||
},
|
},
|
||||||
"reverse-prompt": "<|im_start|>user\n"
|
"reverse-prompt": "<|im_start|>user\n",
|
||||||
|
"systemuser-1st-user-has-prefix": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue