ChatON:ChatParts: Allow flexibility for more refined tokenization
This commit is contained in:
parent
6b23f15ffe
commit
92e780fb1a
1 changed files with 16 additions and 2 deletions
|
@ -85,7 +85,9 @@ json conMeta;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helps keep user prompt and chat-hs-template tag parts seperate, but in sequence
|
* Helps keep user prompt and chat-hs-template tag parts seperate, but in sequence.
|
||||||
|
* Inturn gives the flexibility to tokenize with or without parse_special flag, wrt the different parts of the chat msg(s).
|
||||||
|
* One could use the triplet of str, get_types and get_partslens to achieve the above mentioned flexibility.
|
||||||
*/
|
*/
|
||||||
class ChatParts {
|
class ChatParts {
|
||||||
|
|
||||||
|
@ -100,7 +102,7 @@ public:
|
||||||
// Identify no string condition and or ignore string.
|
// Identify no string condition and or ignore string.
|
||||||
static const auto X = '?';
|
static const auto X = '?';
|
||||||
|
|
||||||
ChatParts() :parts{}, types{""} {}
|
ChatParts() : parts{}, types{""} {}
|
||||||
|
|
||||||
char last_type() {
|
char last_type() {
|
||||||
if (types.length() == 0) {
|
if (types.length() == 0) {
|
||||||
|
@ -126,6 +128,18 @@ public:
|
||||||
return allin;
|
return allin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string get_types() {
|
||||||
|
return types;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> get_partslens() {
|
||||||
|
std::vector<int> lens = {};
|
||||||
|
for(auto part: parts) {
|
||||||
|
lens.push_back(part.length());
|
||||||
|
}
|
||||||
|
return lens;
|
||||||
|
}
|
||||||
|
|
||||||
std::string name() {
|
std::string name() {
|
||||||
return typeid(*this).name();
|
return typeid(*this).name();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue