ChatON: Move loading from json file into its own file

Any program which wants to use json file to update/extend the
chaton's configurable template data, can include this new file
chaton_json.hpp, to get the reqd functionality.

Update chaton_meta_ok, _chaton_meta_validate_dump and
chaton_meta_load_json to either work with a passed ChatTemplates
instance, or fallback to the compiled-in global instance of same.
This commit is contained in:
HanishKVC 2024-05-15 02:26:51 +05:30
parent 14c28e717e
commit a3d641b555
4 changed files with 117 additions and 101 deletions

View file

@ -240,12 +240,6 @@ const auto K_SYSTEMUSER_1ST_USER_HAS_BEGIN = "systemuser-1st-user-has-begin";
const auto K_SYSTEMUSER_1ST_USER_HAS_PREFIX = "systemuser-1st-user-has-prefix"; const auto K_SYSTEMUSER_1ST_USER_HAS_PREFIX = "systemuser-1st-user-has-prefix";
const auto K_REVERSE_PROMPT = "reverse-prompt"; const auto K_REVERSE_PROMPT = "reverse-prompt";
#define CHATON_JSON
#ifdef CHATON_JSON
#include <json.hpp>
using json = nlohmann::ordered_json;
#endif
/** /**
@ -547,91 +541,6 @@ public:
#include "chaton_meta.hpp" #include "chaton_meta.hpp"
//ChatTemplates gCT = {{}}; //ChatTemplates gCT = {{}};
#ifdef CHATON_JSON
// Get value corresponding to the specified hierarchy/chain of keys.
// Also throw a more informative exception, if it is not found.
template <typename SupportedType>
inline SupportedType json_get(json &j, const std::vector<std::string_view> &keys, const std::string &msgTag) {
json curJ = j;
std::stringstream skey;
int i = 0;
for(auto key: keys) {
if (i != 0) skey << "-";
i += 1;
skey << key;
if (curJ.contains(key)) {
curJ = curJ[key];
} else {
std::stringstream ss;
ss << "ERRR:ChatON:" << __func__ << ":" << msgTag << ":KeyChain [" << skey.str() << "] is missing";
throw std::runtime_error(ss.str());
}
}
return curJ;
}
// Update/Extend the compiled-in configurable template data (the meta) from the specified json file.
inline bool chaton_meta_load_json(const std::string &fname) {
std::ifstream f(fname);
json conMeta = json::parse(f);
for(auto it=conMeta.begin(); it != conMeta.end(); ++it) {
auto group = it.key();
auto curTmpl = conMeta[group];
std::string globalBegin = json_get<std::string>(curTmpl, { K_GLOBAL, K_BEGIN }, group);
gCT.set_value<std::string>(group, { K_GLOBAL, K_BEGIN }, globalBegin);
std::string globalEnd = json_get<std::string>(curTmpl, { K_GLOBAL, K_END }, group);
gCT.set_value<std::string>(group, { K_GLOBAL, K_END }, globalEnd);
std::string systemBegin = json_get<std::string>(curTmpl, { K_SYSTEM, K_BEGIN }, group);
gCT.set_value<std::string>(group, { K_SYSTEM, K_BEGIN }, systemBegin);
std::string systemPrefix = json_get<std::string>(curTmpl, { K_SYSTEM, K_PREFIX }, group);
gCT.set_value<std::string>(group, { K_SYSTEM, K_PREFIX }, systemPrefix);
std::string systemSuffix = json_get<std::string>(curTmpl, { K_SYSTEM, K_SUFFIX }, group);
gCT.set_value<std::string>(group, { K_SYSTEM, K_SUFFIX }, systemSuffix);
std::string systemEnd = json_get<std::string>(curTmpl, { K_SYSTEM, K_END }, group);
gCT.set_value<std::string>(group, { K_SYSTEM, K_END }, systemEnd);
std::string userBegin = json_get<std::string>(curTmpl, { K_USER, K_BEGIN }, group);
gCT.set_value<std::string>(group, { K_USER, K_BEGIN }, userBegin);
std::string userPrefix = json_get<std::string>(curTmpl, { K_USER, K_PREFIX }, group);
gCT.set_value<std::string>(group, { K_USER, K_PREFIX }, userPrefix);
std::string userSuffix = json_get<std::string>(curTmpl, { K_USER, K_SUFFIX }, group);
gCT.set_value<std::string>(group, { K_USER, K_SUFFIX }, userSuffix);
std::string userEnd = json_get<std::string>(curTmpl, { K_USER, K_END }, group);
gCT.set_value<std::string>(group, { K_USER, K_END }, userEnd);
std::string assistantBegin = json_get<std::string>(curTmpl, { K_ASSISTANT, K_BEGIN }, group);
gCT.set_value<std::string>(group, { K_ASSISTANT, K_BEGIN }, assistantBegin);
std::string assistantPrefix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_PREFIX }, group);
gCT.set_value<std::string>(group, { K_ASSISTANT, K_PREFIX }, assistantPrefix);
std::string assistantSuffix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_SUFFIX }, group);
gCT.set_value<std::string>(group, { K_ASSISTANT, K_SUFFIX }, assistantSuffix);
std::string assistantEnd = json_get<std::string>(curTmpl, { K_ASSISTANT, K_END }, group);
gCT.set_value<std::string>(group, { K_ASSISTANT, K_END }, assistantEnd);
std::string reversePrompt = json_get<std::string>(curTmpl, { K_REVERSE_PROMPT }, group);
gCT.set_value<std::string>(group, { K_REVERSE_PROMPT }, reversePrompt);
bool systemHasSuffix = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, group);
gCT.set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, systemHasSuffix);
bool systemHasEnd = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_END }, group);
gCT.set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_END }, systemHasEnd);
bool userHasBegin = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, group);
gCT.set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, userHasBegin);
bool userHasPrefix = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, group);
gCT.set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, userHasPrefix);
}
LDBUG_LN("%s", gCT.dump("", "DBUG:ChatONMetaLoad:ChatTemplates").c_str());
return true;
}
#endif
inline bool chaton_tmpl_exists(const std::string &tmpl) { inline bool chaton_tmpl_exists(const std::string &tmpl) {
return gCT.tmpl_exists(tmpl); return gCT.tmpl_exists(tmpl);
@ -862,20 +771,25 @@ inline std::vector<llama_token> chaton_llama_tokenize_ex(
/** /**
* Validate specified chaton-template-id and inturn dump the contents * Validate specified chaton-template-id and inturn dump the contents related to that
* related to that specific chat-handshake-template-standard. * specific chat-handshake-template-standard, wrt the specified ChatTemplates.
* If ct is nullptr, then map to the compiled-in ChatTemplates global instance.
*
* ALERT: If no template-id is specified, it is ignored with a warning. * ALERT: If no template-id is specified, it is ignored with a warning.
* NOTE: It optionally dumps the full loaded chaton templates data * NOTE: It optionally dumps the full loaded chaton templates data
* NOTE: It uses tmpl_basiccheck, which raises exception, if all the required * NOTE: It uses tmpl_basiccheck, which raises exception, if all the required
* keys/fields are not present wrt the specified template-standard/model-id. * keys/fields are not present wrt the specified template-standard/model-id.
*/ */
inline bool _chaton_meta_validate_dump(std::string &tmpl) { inline bool _chaton_meta_validate_dump(std::string &tmpl, ChatTemplates *ct=nullptr) {
LDBUG_LN("\n\nINFO:%s:%s:\n%s", __func__, tmpl.c_str(), gCT.dump("", "INFO:ChatOnMetaValidateDump").c_str()); if (ct == nullptr) {
ct = &gCT;
}
LDBUG_LN("\n\nINFO:%s:%s:\n%s", __func__, tmpl.c_str(), ct->dump("", "INFO:ChatOnMetaValidateDump").c_str());
if (tmpl.empty()) { if (tmpl.empty()) {
return true; return true;
} }
std::stringstream ss; std::stringstream ss;
if (gCT.tmpl_basiccheck(tmpl, ss, "INFO:ChatOnMetaValidateDump")) { if (ct->tmpl_basiccheck(tmpl, ss, "INFO:ChatOnMetaValidateDump")) {
LOGXLN("%s", ss.str().c_str()); LOGXLN("%s", ss.str().c_str());
} else { } else {
return false; return false;
@ -884,8 +798,10 @@ inline bool _chaton_meta_validate_dump(std::string &tmpl) {
} }
/** /**
* Verify that specified chaton-template-id contains required fields using meta-validate-dump * In the passed ChatTemplates instance, verify that specified chaton-template-id
* contains required fields using meta-validate-dump.
* If ct is nullptr, then map to the compiled-in ChatTemplates global instance.
*/ */
inline bool chaton_meta_ok(std::string &tmpl) { inline bool chaton_meta_ok(std::string &tmpl, ChatTemplates *ct=nullptr) {
return _chaton_meta_validate_dump(tmpl); return _chaton_meta_validate_dump(tmpl, ct);
} }

100
common/chaton_json.hpp Normal file
View file

@ -0,0 +1,100 @@
#pragma once
/**
* Helper to load chaton's configurable template data from json file
* By Humans for All
*
* Any program which wants to load configurable template data from json file,
* can include this file to get the needed helpers for same.
*/
#include "chaton.hpp"
#include <json.hpp>
using json = nlohmann::ordered_json;
// Get value corresponding to the specified hierarchy/chain of keys.
// Also throw a more informative exception, if it is not found.
template <typename SupportedType>
inline SupportedType json_get(json &j, const std::vector<std::string_view> &keys, const std::string &msgTag) {
json curJ = j;
std::stringstream skey;
int i = 0;
for(auto key: keys) {
if (i != 0) skey << "-";
i += 1;
skey << key;
if (curJ.contains(key)) {
curJ = curJ[key];
} else {
std::stringstream ss;
ss << "ERRR:ChatON:" << __func__ << ":" << msgTag << ":KeyChain [" << skey.str() << "] is missing";
throw std::runtime_error(ss.str());
}
}
return curJ;
}
// Update/Extend the configurable template data in specified ChatTemplates instance from the specified json file.
// If nullptr is passed wrt ct, then update/extend the global compiled-in configurable template data.
inline bool chaton_meta_load_json(const std::string &fname, ChatTemplates *ct=nullptr) {
if (ct == nullptr) {
ct = &gCT;
}
std::ifstream f(fname);
json conMeta = json::parse(f);
for(auto it=conMeta.begin(); it != conMeta.end(); ++it) {
auto group = it.key();
auto curTmpl = conMeta[group];
std::string globalBegin = json_get<std::string>(curTmpl, { K_GLOBAL, K_BEGIN }, group);
ct->set_value<std::string>(group, { K_GLOBAL, K_BEGIN }, globalBegin);
std::string globalEnd = json_get<std::string>(curTmpl, { K_GLOBAL, K_END }, group);
ct->set_value<std::string>(group, { K_GLOBAL, K_END }, globalEnd);
std::string systemBegin = json_get<std::string>(curTmpl, { K_SYSTEM, K_BEGIN }, group);
ct->set_value<std::string>(group, { K_SYSTEM, K_BEGIN }, systemBegin);
std::string systemPrefix = json_get<std::string>(curTmpl, { K_SYSTEM, K_PREFIX }, group);
ct->set_value<std::string>(group, { K_SYSTEM, K_PREFIX }, systemPrefix);
std::string systemSuffix = json_get<std::string>(curTmpl, { K_SYSTEM, K_SUFFIX }, group);
ct->set_value<std::string>(group, { K_SYSTEM, K_SUFFIX }, systemSuffix);
std::string systemEnd = json_get<std::string>(curTmpl, { K_SYSTEM, K_END }, group);
ct->set_value<std::string>(group, { K_SYSTEM, K_END }, systemEnd);
std::string userBegin = json_get<std::string>(curTmpl, { K_USER, K_BEGIN }, group);
ct->set_value<std::string>(group, { K_USER, K_BEGIN }, userBegin);
std::string userPrefix = json_get<std::string>(curTmpl, { K_USER, K_PREFIX }, group);
ct->set_value<std::string>(group, { K_USER, K_PREFIX }, userPrefix);
std::string userSuffix = json_get<std::string>(curTmpl, { K_USER, K_SUFFIX }, group);
ct->set_value<std::string>(group, { K_USER, K_SUFFIX }, userSuffix);
std::string userEnd = json_get<std::string>(curTmpl, { K_USER, K_END }, group);
ct->set_value<std::string>(group, { K_USER, K_END }, userEnd);
std::string assistantBegin = json_get<std::string>(curTmpl, { K_ASSISTANT, K_BEGIN }, group);
ct->set_value<std::string>(group, { K_ASSISTANT, K_BEGIN }, assistantBegin);
std::string assistantPrefix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_PREFIX }, group);
ct->set_value<std::string>(group, { K_ASSISTANT, K_PREFIX }, assistantPrefix);
std::string assistantSuffix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_SUFFIX }, group);
ct->set_value<std::string>(group, { K_ASSISTANT, K_SUFFIX }, assistantSuffix);
std::string assistantEnd = json_get<std::string>(curTmpl, { K_ASSISTANT, K_END }, group);
ct->set_value<std::string>(group, { K_ASSISTANT, K_END }, assistantEnd);
std::string reversePrompt = json_get<std::string>(curTmpl, { K_REVERSE_PROMPT }, group);
ct->set_value<std::string>(group, { K_REVERSE_PROMPT }, reversePrompt);
bool systemHasSuffix = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, group);
ct->set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, systemHasSuffix);
bool systemHasEnd = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_END }, group);
ct->set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_END }, systemHasEnd);
bool userHasBegin = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, group);
ct->set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, userHasBegin);
bool userHasPrefix = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, group);
ct->set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, userHasPrefix);
}
LDBUG_LN("%s", ct->dump("", "DBUG:ChatONMetaLoad:ChatTemplates").c_str());
return true;
}

View file

@ -1,5 +1,5 @@
#include "common.h" #include "common.h"
#include "chaton.hpp" #include "chaton_json.hpp"
#include "console.h" #include "console.h"
#include "llama.h" #include "llama.h"

View file

@ -7,7 +7,7 @@
#include <cassert> #include <cassert>
#include "llama.h" #include "llama.h"
#include "chaton.hpp" #include "chaton_json.hpp"
std::vector<std::string> templateIds = { "llama2", "llama3", "chatml", std::vector<std::string> templateIds = { "llama2", "llama3", "chatml",