From a3d641b55549e0092163023c97ccbd977817706e Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Wed, 15 May 2024 02:26:51 +0530
Subject: [PATCH] ChatON: Move loading from json file into its own file

Any program which wants to use json file to update/extend the
chaton's configurable template data, can include this new file
chaton_json.hpp, to get the reqd functionality.

Update chaton_meta_ok, _chaton_meta_validate_dump and
chaton_meta_load_json to either work with a passed ChatTemplates
instance, or fallback to the compiled-in global instance of same.
---
 common/chaton.hpp                   | 114 ++++------------------------
 common/chaton_json.hpp              | 100 ++++++++++++++++++++++++
 examples/main/main.cpp              |   2 +-
 tests/test-chat-template-chaton.cpp |   2 +-
 4 files changed, 117 insertions(+), 101 deletions(-)
 create mode 100644 common/chaton_json.hpp
diff --git a/common/chaton.hpp b/common/chaton.hpp
index b26bb237e..af19727d8 100644
--- a/common/chaton.hpp
+++ b/common/chaton.hpp
@@ -240,12 +240,6 @@ const auto K_SYSTEMUSER_1ST_USER_HAS_BEGIN = "systemuser-1st-user-has-begin";
 const auto K_SYSTEMUSER_1ST_USER_HAS_PREFIX = "systemuser-1st-user-has-prefix";
 const auto K_REVERSE_PROMPT = "reverse-prompt";
 
-#define CHATON_JSON
-#ifdef CHATON_JSON
-#include <json.hpp>
-using json = nlohmann::ordered_json;
-#endif
-
 
 
 /**
@@ -547,91 +541,6 @@ public:
 #include "chaton_meta.hpp"
 //ChatTemplates gCT = {{}};
 
-#ifdef CHATON_JSON
-
-// Get value corresponding to the specified hierarchy/chain of keys.
-// Also throw a more informative exception, if it is not found.
-template <typename SupportedType>
-inline SupportedType json_get(json &j, const std::vector<std::string_view> &keys, const std::string &msgTag) {
-    json curJ = j;
-    std::stringstream skey;
-    int i = 0;
-    for(auto key: keys) {
-        if (i != 0) skey << "-";
-        i += 1;
-        skey << key;
-        if (curJ.contains(key)) {
-            curJ = curJ[key];
-        } else {
-            std::stringstream ss;
-            ss << "ERRR:ChatON:" << __func__ << ":" << msgTag << ":KeyChain [" << skey.str() << "] is missing";
-            throw std::runtime_error(ss.str());
-        }
-    }
-    return curJ;
-}
-
-// Update/Extend the compiled-in configurable template data (the meta) from the specified json file.
-inline bool chaton_meta_load_json(const std::string &fname) {
-    std::ifstream f(fname);
-    json conMeta = json::parse(f);
-    for(auto it=conMeta.begin(); it != conMeta.end(); ++it) {
-
-        auto group = it.key();
-        auto curTmpl = conMeta[group];
-
-        std::string globalBegin = json_get<std::string>(curTmpl, { K_GLOBAL, K_BEGIN }, group);
-        gCT.set_value<std::string>(group, { K_GLOBAL, K_BEGIN }, globalBegin);
-        std::string globalEnd = json_get<std::string>(curTmpl, { K_GLOBAL, K_END }, group);
-        gCT.set_value<std::string>(group, { K_GLOBAL, K_END }, globalEnd);
-
-        std::string systemBegin = json_get<std::string>(curTmpl, { K_SYSTEM, K_BEGIN }, group);
-        gCT.set_value<std::string>(group, { K_SYSTEM, K_BEGIN }, systemBegin);
-        std::string systemPrefix = json_get<std::string>(curTmpl, { K_SYSTEM, K_PREFIX }, group);
-        gCT.set_value<std::string>(group, { K_SYSTEM, K_PREFIX }, systemPrefix);
-        std::string systemSuffix = json_get<std::string>(curTmpl, { K_SYSTEM, K_SUFFIX }, group);
-        gCT.set_value<std::string>(group, { K_SYSTEM, K_SUFFIX }, systemSuffix);
-        std::string systemEnd = json_get<std::string>(curTmpl, { K_SYSTEM, K_END }, group);
-        gCT.set_value<std::string>(group, { K_SYSTEM, K_END }, systemEnd);
-
-        std::string userBegin = json_get<std::string>(curTmpl, { K_USER, K_BEGIN }, group);
-        gCT.set_value<std::string>(group, { K_USER, K_BEGIN }, userBegin);
-        std::string userPrefix = json_get<std::string>(curTmpl, { K_USER, K_PREFIX }, group);
-        gCT.set_value<std::string>(group, { K_USER, K_PREFIX }, userPrefix);
-        std::string userSuffix = json_get<std::string>(curTmpl, { K_USER, K_SUFFIX }, group);
-        gCT.set_value<std::string>(group, { K_USER, K_SUFFIX }, userSuffix);
-        std::string userEnd = json_get<std::string>(curTmpl, { K_USER, K_END }, group);
-        gCT.set_value<std::string>(group, { K_USER, K_END }, userEnd);
-
-        std::string assistantBegin = json_get<std::string>(curTmpl, { K_ASSISTANT, K_BEGIN }, group);
-        gCT.set_value<std::string>(group, { K_ASSISTANT, K_BEGIN }, assistantBegin);
-        std::string assistantPrefix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_PREFIX }, group);
-        gCT.set_value<std::string>(group, { K_ASSISTANT, K_PREFIX }, assistantPrefix);
-        std::string assistantSuffix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_SUFFIX }, group);
-        gCT.set_value<std::string>(group, { K_ASSISTANT, K_SUFFIX }, assistantSuffix);
-        std::string assistantEnd = json_get<std::string>(curTmpl, { K_ASSISTANT, K_END }, group);
-        gCT.set_value<std::string>(group, { K_ASSISTANT, K_END }, assistantEnd);
-
-        std::string reversePrompt = json_get<std::string>(curTmpl, { K_REVERSE_PROMPT }, group);
-        gCT.set_value<std::string>(group, { K_REVERSE_PROMPT }, reversePrompt);
-
-        bool systemHasSuffix = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, group);
-        gCT.set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, systemHasSuffix);
-        bool systemHasEnd = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_END }, group);
-        gCT.set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_END }, systemHasEnd);
-
-        bool userHasBegin = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, group);
-        gCT.set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, userHasBegin);
-        bool userHasPrefix = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, group);
-        gCT.set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, userHasPrefix);
-
-    }
-    LDBUG_LN("%s", gCT.dump("", "DBUG:ChatONMetaLoad:ChatTemplates").c_str());
-    return true;
-}
-
-#endif
-
 
 inline bool chaton_tmpl_exists(const std::string &tmpl) {
     return gCT.tmpl_exists(tmpl);
@@ -862,20 +771,25 @@ inline std::vector<llama_token> chaton_llama_tokenize_ex(
 
 
 /**
- * Validate specified chaton-template-id and inturn dump the contents
- * related to that specific chat-handshake-template-standard.
+ * Validate specified chaton-template-id and inturn dump the contents related to that
+ * specific chat-handshake-template-standard, wrt the specified ChatTemplates.
+ * If ct is nullptr, then map to the compiled-in ChatTemplates global instance.
+ * 
  * ALERT: If no template-id is specified, it is ignored with a warning.
  * NOTE: It optionally dumps the full loaded chaton templates data
  * NOTE: It uses tmpl_basiccheck, which raises exception, if all the required
  * keys/fields are not present wrt the specified template-standard/model-id.
  */
-inline bool _chaton_meta_validate_dump(std::string &tmpl) {
-    LDBUG_LN("\n\nINFO:%s:%s:\n%s", __func__, tmpl.c_str(), gCT.dump("", "INFO:ChatOnMetaValidateDump").c_str());
+inline bool _chaton_meta_validate_dump(std::string &tmpl, ChatTemplates *ct=nullptr) {
+    if (ct == nullptr) {
+        ct = &gCT;
+    }
+    LDBUG_LN("\n\nINFO:%s:%s:\n%s", __func__, tmpl.c_str(), ct->dump("", "INFO:ChatOnMetaValidateDump").c_str());
     if (tmpl.empty()) {
         return true;
     }
     std::stringstream ss;
-    if (gCT.tmpl_basiccheck(tmpl, ss, "INFO:ChatOnMetaValidateDump")) {
+    if (ct->tmpl_basiccheck(tmpl, ss, "INFO:ChatOnMetaValidateDump")) {
         LOGXLN("%s", ss.str().c_str());
     } else {
         return false;
@@ -884,8 +798,10 @@ inline bool _chaton_meta_validate_dump(std::string &tmpl) {
 }
 
 /**
- * Verify that specified chaton-template-id contains required fields using meta-validate-dump
+ * In the passed ChatTemplates instance, verify that specified chaton-template-id
+ * contains required fields using meta-validate-dump.
+ * If ct is nullptr, then map to the compiled-in ChatTemplates global instance.
  */
-inline bool chaton_meta_ok(std::string &tmpl) {
-    return _chaton_meta_validate_dump(tmpl);
+inline bool chaton_meta_ok(std::string &tmpl, ChatTemplates *ct=nullptr) {
+    return _chaton_meta_validate_dump(tmpl, ct);
 }
diff --git a/common/chaton_json.hpp b/common/chaton_json.hpp
new file mode 100644
index 000000000..7915adcc4
--- /dev/null
+++ b/common/chaton_json.hpp
@@ -0,0 +1,100 @@
+#pragma once
+
+/**
+ * Helper to load chaton's configurable template data from json file
+ * By Humans for All
+ * 
+ * Any program which wants to load configurable template data from json file,
+ * can include this file to get the needed helpers for same.
+*/
+
+#include "chaton.hpp"
+
+#include <json.hpp>
+using json = nlohmann::ordered_json;
+
+
+// Get value corresponding to the specified hierarchy/chain of keys.
+// Also throw a more informative exception, if it is not found.
+template <typename SupportedType>
+inline SupportedType json_get(json &j, const std::vector<std::string_view> &keys, const std::string &msgTag) {
+    json curJ = j;
+    std::stringstream skey;
+    int i = 0;
+    for(auto key: keys) {
+        if (i != 0) skey << "-";
+        i += 1;
+        skey << key;
+        if (curJ.contains(key)) {
+            curJ = curJ[key];
+        } else {
+            std::stringstream ss;
+            ss << "ERRR:ChatON:" << __func__ << ":" << msgTag << ":KeyChain [" << skey.str() << "] is missing";
+            throw std::runtime_error(ss.str());
+        }
+    }
+    return curJ;
+}
+
+// Update/Extend the configurable template data in specified ChatTemplates instance from the specified json file.
+// If nullptr is passed wrt ct, then update/extend the global compiled-in configurable template data.
+inline bool chaton_meta_load_json(const std::string &fname, ChatTemplates *ct=nullptr) {
+    if (ct == nullptr) {
+        ct = &gCT;
+    }
+    std::ifstream f(fname);
+    json conMeta = json::parse(f);
+    for(auto it=conMeta.begin(); it != conMeta.end(); ++it) {
+
+        auto group = it.key();
+        auto curTmpl = conMeta[group];
+
+        std::string globalBegin = json_get<std::string>(curTmpl, { K_GLOBAL, K_BEGIN }, group);
+        ct->set_value<std::string>(group, { K_GLOBAL, K_BEGIN }, globalBegin);
+        std::string globalEnd = json_get<std::string>(curTmpl, { K_GLOBAL, K_END }, group);
+        ct->set_value<std::string>(group, { K_GLOBAL, K_END }, globalEnd);
+
+        std::string systemBegin = json_get<std::string>(curTmpl, { K_SYSTEM, K_BEGIN }, group);
+        ct->set_value<std::string>(group, { K_SYSTEM, K_BEGIN }, systemBegin);
+        std::string systemPrefix = json_get<std::string>(curTmpl, { K_SYSTEM, K_PREFIX }, group);
+        ct->set_value<std::string>(group, { K_SYSTEM, K_PREFIX }, systemPrefix);
+        std::string systemSuffix = json_get<std::string>(curTmpl, { K_SYSTEM, K_SUFFIX }, group);
+        ct->set_value<std::string>(group, { K_SYSTEM, K_SUFFIX }, systemSuffix);
+        std::string systemEnd = json_get<std::string>(curTmpl, { K_SYSTEM, K_END }, group);
+        ct->set_value<std::string>(group, { K_SYSTEM, K_END }, systemEnd);
+
+        std::string userBegin = json_get<std::string>(curTmpl, { K_USER, K_BEGIN }, group);
+        ct->set_value<std::string>(group, { K_USER, K_BEGIN }, userBegin);
+        std::string userPrefix = json_get<std::string>(curTmpl, { K_USER, K_PREFIX }, group);
+        ct->set_value<std::string>(group, { K_USER, K_PREFIX }, userPrefix);
+        std::string userSuffix = json_get<std::string>(curTmpl, { K_USER, K_SUFFIX }, group);
+        ct->set_value<std::string>(group, { K_USER, K_SUFFIX }, userSuffix);
+        std::string userEnd = json_get<std::string>(curTmpl, { K_USER, K_END }, group);
+        ct->set_value<std::string>(group, { K_USER, K_END }, userEnd);
+
+        std::string assistantBegin = json_get<std::string>(curTmpl, { K_ASSISTANT, K_BEGIN }, group);
+        ct->set_value<std::string>(group, { K_ASSISTANT, K_BEGIN }, assistantBegin);
+        std::string assistantPrefix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_PREFIX }, group);
+        ct->set_value<std::string>(group, { K_ASSISTANT, K_PREFIX }, assistantPrefix);
+        std::string assistantSuffix = json_get<std::string>(curTmpl, { K_ASSISTANT, K_SUFFIX }, group);
+        ct->set_value<std::string>(group, { K_ASSISTANT, K_SUFFIX }, assistantSuffix);
+        std::string assistantEnd = json_get<std::string>(curTmpl, { K_ASSISTANT, K_END }, group);
+        ct->set_value<std::string>(group, { K_ASSISTANT, K_END }, assistantEnd);
+
+        std::string reversePrompt = json_get<std::string>(curTmpl, { K_REVERSE_PROMPT }, group);
+        ct->set_value<std::string>(group, { K_REVERSE_PROMPT }, reversePrompt);
+
+        bool systemHasSuffix = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, group);
+        ct->set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_SUFFIX }, systemHasSuffix);
+        bool systemHasEnd = json_get<bool>(curTmpl, { K_SYSTEMUSER_SYSTEM_HAS_END }, group);
+        ct->set_value(group, { K_SYSTEMUSER_SYSTEM_HAS_END }, systemHasEnd);
+
+        bool userHasBegin = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, group);
+        ct->set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_BEGIN }, userHasBegin);
+        bool userHasPrefix = json_get<bool>(curTmpl, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, group);
+        ct->set_value(group, { K_SYSTEMUSER_1ST_USER_HAS_PREFIX }, userHasPrefix);
+
+    }
+    LDBUG_LN("%s", ct->dump("", "DBUG:ChatONMetaLoad:ChatTemplates").c_str());
+    return true;
+}
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 39fdcdc54..703502968 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -1,5 +1,5 @@
 #include "common.h"
-#include "chaton.hpp"
+#include "chaton_json.hpp"
 
 #include "console.h"
 #include "llama.h"
diff --git a/tests/test-chat-template-chaton.cpp b/tests/test-chat-template-chaton.cpp
index e14266ad5..7e352738c 100644
--- a/tests/test-chat-template-chaton.cpp
+++ b/tests/test-chat-template-chaton.cpp
@@ -7,7 +7,7 @@
 #include <cassert>
 
 #include "llama.h"
-#include "chaton.hpp"
+#include "chaton_json.hpp"
 
 
 std::vector<std::string> templateIds = { "llama2", "llama3", "chatml",