From 8fe8231313b996bce1d6c5ae608b8a291cabbdff Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Wed, 8 May 2024 19:36:39 +0530 Subject: [PATCH] ChatON:SubPartsAwareTokenizePath: Allow extract subparts testing --- common/chaton.hpp | 8 ++++++-- tests/test-chat-template-chaton.cpp | 3 ++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/common/chaton.hpp b/common/chaton.hpp index 686466cf9..c278246ee 100644 --- a/common/chaton.hpp +++ b/common/chaton.hpp @@ -635,6 +635,10 @@ inline std::vector chaton_llama_tokenize( bool add_special, bool parse_special) { LOGLN("DBUG:%s:%s:special[add:%d, parse:%d]", __func__, text.c_str(), add_special, parse_special); + if (model == nullptr) { + LOG_TEELN("ERRR:%s:Model NOT Provided:%s:special[add:%d, parse:%d]", __func__, text.c_str(), add_special, parse_special); + return std::vector{}; + } // upper limit for the number of tokens int n_tokens = text.length() + 2 * add_special; std::vector result(n_tokens); @@ -654,7 +658,7 @@ inline std::vector chaton_llama_tokenize( // If you want to parse special tokens in the taggedText, independent of what // partsTypes specifies, then set forceParseSpecial to true. inline std::vector chaton_llama_tokenize_ex( - const llama_context *ctx, + const struct llama_model *model, const std::string &taggedText, const std::string &partsTypes, const std::vector &partsLengths, @@ -671,7 +675,7 @@ inline std::vector chaton_llama_tokenize_ex( iStart += partLen; auto parseSpecial = partType == ChatParts::S ? true : false; parseSpecial |= forceParseSpecial; - auto curTokens = chaton_llama_tokenize(llama_get_model(ctx), msgPart, addSpecial, parseSpecial); + auto curTokens = chaton_llama_tokenize(model, msgPart, addSpecial, parseSpecial); tokens.insert(tokens.end(), curTokens.begin(), curTokens.end()); } return tokens; diff --git a/tests/test-chat-template-chaton.cpp b/tests/test-chat-template-chaton.cpp index 8c7a26489..ce65cc31e 100644 --- a/tests/test-chat-template-chaton.cpp +++ b/tests/test-chat-template-chaton.cpp @@ -173,6 +173,7 @@ int main(int argc, char **argv) { exit(1); } std::string metaJson(argv[1]); - check_chaton(metaJson); + //check_chaton(metaJson); + check_chaton_ex(metaJson); return 0; }