ChatON:ChatParts: Allow flexibility for more refined tokenization

2024-04-24 20:23:44 +05:30 · 2024-04-24 20:23:44 +05:30 · 92e780fb1a
commit 92e780fb1a
parent 6b23f15ffe
1 changed files with 16 additions and 2 deletions
--- a/common/chaton.hpp
+++ b/common/chaton.hpp
@ -85,7 +85,9 @@ json conMeta;
 /**
- * Helps keep user prompt and chat-hs-template tag parts seperate, but in sequence
+ * Helps keep user prompt and chat-hs-template tag parts seperate, but in sequence.
 * Inturn gives the flexibility to tokenize with or without parse_special flag, wrt the different parts of the chat msg(s).
 * One could use the triplet of str, get_types and get_partslens to achieve the above mentioned flexibility.
 */
 class ChatParts {
@ -100,7 +102,7 @@ public:
    // Identify no string condition and or ignore string.
    static const auto X = '?';
-    ChatParts() :parts{}, types{""} {}
+    ChatParts() : parts{}, types{""} {}
    char last_type() {
        if (types.length() == 0) {
@ -126,6 +128,18 @@ public:
        return allin;
    }
    std::string get_types() {
        return types;
    }
    std::vector<int> get_partslens() {
        std::vector<int> lens = {};
        for(auto part: parts) {
            lens.push_back(part.length());
        }
        return lens;
    }
    std::string name() {
        return typeid(*this).name();
    }