ChatON: Move loading from json file into its own file
Any program which wants to use json file to update/extend the chaton's configurable template data, can include this new file chaton_json.hpp, to get the reqd functionality. Update chaton_meta_ok, _chaton_meta_validate_dump and chaton_meta_load_json to either work with a passed ChatTemplates instance, or fallback to the compiled-in global instance of same.
This commit is contained in:
parent
14c28e717e
commit
a3d641b555
4 changed files with 117 additions and 101 deletions
|
@ -240,12 +240,6 @@ const auto K_SYSTEMUSER_1ST_USER_HAS_BEGIN = "systemuser-1st-user-has-begin";
|
||||||
const auto K_SYSTEMUSER_1ST_USER_HAS_PREFIX = "systemuser-1st-user-has-prefix";
|
const auto K_SYSTEMUSER_1ST_USER_HAS_PREFIX = "systemuser-1st-user-has-prefix";
|
||||||
const auto K_REVERSE_PROMPT = "reverse-prompt";
|
const auto K_REVERSE_PROMPT = "reverse-prompt";
|
||||||
|
|
||||||
#define CHATON_JSON
|
|
||||||
#ifdef CHATON_JSON
|
|
||||||
#include <json.hpp>
|
|
||||||
using json = nlohmann::ordered_json;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -547,91 +541,6 @@ public:
|
||||||
#include "chaton_meta.hpp"
|
#include "chaton_meta.hpp"
|
||||||
//ChatTemplates gCT = {{}};
|
//ChatTemplates gCT = {{}};
|
||||||
|
|
||||||
#ifdef CHATON_JSON
|
|
||||||
|
|
||||||
// Get value corresponding to the specified hierarchy/chain of keys.
|
|
||||||
// Also throw a more informative exception, if it is not found.
|
|
||||||
template <typename SupportedType>
|
|
||||||
inline SupportedType json_get(json &j, const std::vector<std::string_view> &keys, const std::string &msgTag) {
|
|
||||||
json curJ = j;
|
|
||||||
std::stringstream skey;
|
|
||||||
int i = 0;
|
|
||||||
for(auto key: keys) {
|
|
||||||
if (i != 0) skey << "-";
|
|
||||||
i += 1;
|
|
||||||
skey << key;
|
|
||||||
if (curJ.contains(key)) {
|
|
||||||
curJ = curJ[key];
|
|
||||||
} else {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "ERRR:ChatON:" << __func__ << ":" << msgTag << ":KeyChain [" << skey.str() << "] is missing";
|
|
||||||
throw std::runtime_error(ss.str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return curJ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update/Extend the compiled-in configurable template data (the meta) from the specified json file.
|
|
||||||
inline bool chaton_meta_load_json(const std::string &fname) {
|
|
||||||
std::ifstream f(fname);
|
|
||||||
json conMeta = json::parse(f);
|
|
||||||
for(auto it=conMeta.begin(); it != conMeta.end(); ++it) {
|
|
||||||
|
|
||||||
auto group = it.key();
|
|
||||||
auto curTmpl = conMeta[group];
|
|
||||||
|
|
||||||
std::string globalBegin = json_get<std::string>(curTmpl, { K_GLOBAL, K_BEGIN }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_GLOBAL, K_BEGIN }, globalBegin);
|
|
||||||
std::string globalEnd = json_get<std::string>(curTmpl, { K_GLOBAL, K_END }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_GLOBAL, K_END }, globalEnd);
|
|
||||||
|
|
||||||
std::string systemBegin = json_get<std::string>(curTmpl, { K_SYSTEM, K_BEGIN }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_SYSTEM, K_BEGIN }, systemBegin);
|
|
||||||
std::string systemPrefix = json_get<std::string>(curTmpl, { K_SYSTEM, K_PREFIX }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_SYSTEM, K_PREFIX }, systemPrefix);
|
|
||||||
std::string systemSuffix = json_get<std::string>(curTmpl, { K_SYSTEM, K_SUFFIX }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_SYSTEM, K_SUFFIX }, systemSuffix);
|
|
||||||
std::string systemEnd = json_get<std::string>(curTmpl, { K_SYSTEM, K_END }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_SYSTEM, K_END }, systemEnd);
|
|
||||||
|
|
||||||
std::string userBegin = json_get<std::string>(curTmpl, { K_USER, K_BEGIN }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_USER, K_BEGIN }, userBegin);
|
|
||||||
std::string userPrefix = json_get<std::string>(curTmpl, { K_USER, K_PREFIX }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_USER, K_PREFIX }, userPrefix);
|
|
||||||
std::string userSuffix = json_get<std::string>(curTmpl, { K_USER, K_SUFFIX }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_USER, K_SUFFIX }, userSuffix);
|
|
||||||
std::string userEnd = json_get<std::string>(curTmpl, { K_USER, K_END }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_USER, K_END }, userEnd);
|
|
||||||
|
|
||||||
std::string assistantBegin = json_get<std::string>(curTmpl, { K_ASSISTANT, K_BEGIN }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_ASSISTANT, K_BEGIN }, assistantBegin);
|
|
||||||
std::string assistantPrefix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_PREFIX }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_ASSISTANT, K_PREFIX }, assistantPrefix);
|
|
||||||
std::string assistantSuffix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_SUFFIX }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_ASSISTANT, K_SUFFIX }, assistantSuffix);
|
|
||||||
std::string assistantEnd = json_get<std::string>(curTmpl, { K_ASSISTANT, K_END }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_ASSISTANT, K_END }, assistantEnd);
|
|
||||||
|
|
||||||
std::string reversePrompt = json_get<std::string>(curTmpl, { K_REVERSE_PROMPT }, group);
|
|
||||||
gCT.set_value<std::string>(group, { K_REVERSE_PROMPT }, reversePrompt);
|
|
||||||
|
|
||||||
bool systemHasSuffix = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, group);
|
|
||||||
gCT.set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, systemHasSuffix);
|
|
||||||
bool systemHasEnd = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_END }, group);
|
|
||||||
gCT.set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_END }, systemHasEnd);
|
|
||||||
|
|
||||||
bool userHasBegin = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, group);
|
|
||||||
gCT.set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, userHasBegin);
|
|
||||||
bool userHasPrefix = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, group);
|
|
||||||
gCT.set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, userHasPrefix);
|
|
||||||
|
|
||||||
}
|
|
||||||
LDBUG_LN("%s", gCT.dump("", "DBUG:ChatONMetaLoad:ChatTemplates").c_str());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
inline bool chaton_tmpl_exists(const std::string &tmpl) {
|
inline bool chaton_tmpl_exists(const std::string &tmpl) {
|
||||||
return gCT.tmpl_exists(tmpl);
|
return gCT.tmpl_exists(tmpl);
|
||||||
|
@ -862,20 +771,25 @@ inline std::vector<llama_token> chaton_llama_tokenize_ex(
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validate specified chaton-template-id and inturn dump the contents
|
* Validate specified chaton-template-id and inturn dump the contents related to that
|
||||||
* related to that specific chat-handshake-template-standard.
|
* specific chat-handshake-template-standard, wrt the specified ChatTemplates.
|
||||||
|
* If ct is nullptr, then map to the compiled-in ChatTemplates global instance.
|
||||||
|
*
|
||||||
* ALERT: If no template-id is specified, it is ignored with a warning.
|
* ALERT: If no template-id is specified, it is ignored with a warning.
|
||||||
* NOTE: It optionally dumps the full loaded chaton templates data
|
* NOTE: It optionally dumps the full loaded chaton templates data
|
||||||
* NOTE: It uses tmpl_basiccheck, which raises exception, if all the required
|
* NOTE: It uses tmpl_basiccheck, which raises exception, if all the required
|
||||||
* keys/fields are not present wrt the specified template-standard/model-id.
|
* keys/fields are not present wrt the specified template-standard/model-id.
|
||||||
*/
|
*/
|
||||||
inline bool _chaton_meta_validate_dump(std::string &tmpl) {
|
inline bool _chaton_meta_validate_dump(std::string &tmpl, ChatTemplates *ct=nullptr) {
|
||||||
LDBUG_LN("\n\nINFO:%s:%s:\n%s", __func__, tmpl.c_str(), gCT.dump("", "INFO:ChatOnMetaValidateDump").c_str());
|
if (ct == nullptr) {
|
||||||
|
ct = &gCT;
|
||||||
|
}
|
||||||
|
LDBUG_LN("\n\nINFO:%s:%s:\n%s", __func__, tmpl.c_str(), ct->dump("", "INFO:ChatOnMetaValidateDump").c_str());
|
||||||
if (tmpl.empty()) {
|
if (tmpl.empty()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
if (gCT.tmpl_basiccheck(tmpl, ss, "INFO:ChatOnMetaValidateDump")) {
|
if (ct->tmpl_basiccheck(tmpl, ss, "INFO:ChatOnMetaValidateDump")) {
|
||||||
LOGXLN("%s", ss.str().c_str());
|
LOGXLN("%s", ss.str().c_str());
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
|
@ -884,8 +798,10 @@ inline bool _chaton_meta_validate_dump(std::string &tmpl) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Verify that specified chaton-template-id contains required fields using meta-validate-dump
|
* In the passed ChatTemplates instance, verify that specified chaton-template-id
|
||||||
|
* contains required fields using meta-validate-dump.
|
||||||
|
* If ct is nullptr, then map to the compiled-in ChatTemplates global instance.
|
||||||
*/
|
*/
|
||||||
inline bool chaton_meta_ok(std::string &tmpl) {
|
inline bool chaton_meta_ok(std::string &tmpl, ChatTemplates *ct=nullptr) {
|
||||||
return _chaton_meta_validate_dump(tmpl);
|
return _chaton_meta_validate_dump(tmpl, ct);
|
||||||
}
|
}
|
||||||
|
|
100
common/chaton_json.hpp
Normal file
100
common/chaton_json.hpp
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to load chaton's configurable template data from json file
|
||||||
|
* By Humans for All
|
||||||
|
*
|
||||||
|
* Any program which wants to load configurable template data from json file,
|
||||||
|
* can include this file to get the needed helpers for same.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "chaton.hpp"
|
||||||
|
|
||||||
|
#include <json.hpp>
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
|
|
||||||
|
// Get value corresponding to the specified hierarchy/chain of keys.
|
||||||
|
// Also throw a more informative exception, if it is not found.
|
||||||
|
template <typename SupportedType>
|
||||||
|
inline SupportedType json_get(json &j, const std::vector<std::string_view> &keys, const std::string &msgTag) {
|
||||||
|
json curJ = j;
|
||||||
|
std::stringstream skey;
|
||||||
|
int i = 0;
|
||||||
|
for(auto key: keys) {
|
||||||
|
if (i != 0) skey << "-";
|
||||||
|
i += 1;
|
||||||
|
skey << key;
|
||||||
|
if (curJ.contains(key)) {
|
||||||
|
curJ = curJ[key];
|
||||||
|
} else {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "ERRR:ChatON:" << __func__ << ":" << msgTag << ":KeyChain [" << skey.str() << "] is missing";
|
||||||
|
throw std::runtime_error(ss.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return curJ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update/Extend the configurable template data in specified ChatTemplates instance from the specified json file.
|
||||||
|
// If nullptr is passed wrt ct, then update/extend the global compiled-in configurable template data.
|
||||||
|
inline bool chaton_meta_load_json(const std::string &fname, ChatTemplates *ct=nullptr) {
|
||||||
|
if (ct == nullptr) {
|
||||||
|
ct = &gCT;
|
||||||
|
}
|
||||||
|
std::ifstream f(fname);
|
||||||
|
json conMeta = json::parse(f);
|
||||||
|
for(auto it=conMeta.begin(); it != conMeta.end(); ++it) {
|
||||||
|
|
||||||
|
auto group = it.key();
|
||||||
|
auto curTmpl = conMeta[group];
|
||||||
|
|
||||||
|
std::string globalBegin = json_get<std::string>(curTmpl, { K_GLOBAL, K_BEGIN }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_GLOBAL, K_BEGIN }, globalBegin);
|
||||||
|
std::string globalEnd = json_get<std::string>(curTmpl, { K_GLOBAL, K_END }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_GLOBAL, K_END }, globalEnd);
|
||||||
|
|
||||||
|
std::string systemBegin = json_get<std::string>(curTmpl, { K_SYSTEM, K_BEGIN }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_SYSTEM, K_BEGIN }, systemBegin);
|
||||||
|
std::string systemPrefix = json_get<std::string>(curTmpl, { K_SYSTEM, K_PREFIX }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_SYSTEM, K_PREFIX }, systemPrefix);
|
||||||
|
std::string systemSuffix = json_get<std::string>(curTmpl, { K_SYSTEM, K_SUFFIX }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_SYSTEM, K_SUFFIX }, systemSuffix);
|
||||||
|
std::string systemEnd = json_get<std::string>(curTmpl, { K_SYSTEM, K_END }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_SYSTEM, K_END }, systemEnd);
|
||||||
|
|
||||||
|
std::string userBegin = json_get<std::string>(curTmpl, { K_USER, K_BEGIN }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_USER, K_BEGIN }, userBegin);
|
||||||
|
std::string userPrefix = json_get<std::string>(curTmpl, { K_USER, K_PREFIX }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_USER, K_PREFIX }, userPrefix);
|
||||||
|
std::string userSuffix = json_get<std::string>(curTmpl, { K_USER, K_SUFFIX }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_USER, K_SUFFIX }, userSuffix);
|
||||||
|
std::string userEnd = json_get<std::string>(curTmpl, { K_USER, K_END }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_USER, K_END }, userEnd);
|
||||||
|
|
||||||
|
std::string assistantBegin = json_get<std::string>(curTmpl, { K_ASSISTANT, K_BEGIN }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_ASSISTANT, K_BEGIN }, assistantBegin);
|
||||||
|
std::string assistantPrefix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_PREFIX }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_ASSISTANT, K_PREFIX }, assistantPrefix);
|
||||||
|
std::string assistantSuffix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_SUFFIX }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_ASSISTANT, K_SUFFIX }, assistantSuffix);
|
||||||
|
std::string assistantEnd = json_get<std::string>(curTmpl, { K_ASSISTANT, K_END }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_ASSISTANT, K_END }, assistantEnd);
|
||||||
|
|
||||||
|
std::string reversePrompt = json_get<std::string>(curTmpl, { K_REVERSE_PROMPT }, group);
|
||||||
|
ct->set_value<std::string>(group, { K_REVERSE_PROMPT }, reversePrompt);
|
||||||
|
|
||||||
|
bool systemHasSuffix = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, group);
|
||||||
|
ct->set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, systemHasSuffix);
|
||||||
|
bool systemHasEnd = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_END }, group);
|
||||||
|
ct->set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_END }, systemHasEnd);
|
||||||
|
|
||||||
|
bool userHasBegin = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, group);
|
||||||
|
ct->set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, userHasBegin);
|
||||||
|
bool userHasPrefix = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, group);
|
||||||
|
ct->set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, userHasPrefix);
|
||||||
|
|
||||||
|
}
|
||||||
|
LDBUG_LN("%s", ct->dump("", "DBUG:ChatONMetaLoad:ChatTemplates").c_str());
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "chaton.hpp"
|
#include "chaton_json.hpp"
|
||||||
|
|
||||||
#include "console.h"
|
#include "console.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "chaton.hpp"
|
#include "chaton_json.hpp"
|
||||||
|
|
||||||
|
|
||||||
std::vector<std::string> templateIds = { "llama2", "llama3", "chatml",
|
std::vector<std::string> templateIds = { "llama2", "llama3", "chatml",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue