revert the wstring tokenization. coherency was affected

2023-06-24 12:58:49 +08:00 · 2023-06-24 12:58:49 +08:00 · 8342fe81b1
commit 8342fe81b1
parent 6da38b0d40
2 changed files with 9 additions and 9 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -225,7 +225,7 @@ maxhordectx = 1024
 maxhordelen = 256
 modelbusy = False
 defaultport = 5001
-KcppVersion = "1.32.2"
+KcppVersion = "1.32.3"
 showdebug = True

 class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
--- a/otherarch/mpt_v3.cpp
+++ b/otherarch/mpt_v3.cpp
@ -87,14 +87,14 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
            word.assign(buf.data(), len);

            // Convert token from utf-8
-            std::wstring word_multibytes = convert_to_wstring(word);
-            if(word_multibytes!=L"")
-            {
-                word.resize(word_multibytes.size());
-                for (int w = 0; w < word_multibytes.size(); w++) {
-                    word[w] = uint8_t(word_multibytes[w]);
-                }
-            }
+            // std::wstring word_multibytes = convert_to_wstring(word);
+            // if(word_multibytes!=L"")
+            // {
+            //     word.resize(word_multibytes.size());
+            //     for (int w = 0; w < word_multibytes.size(); w++) {
+            //         word[w] = uint8_t(word_multibytes[w]);
+            //     }
+            // }

            vocab.token_to_id[word] = i;
            vocab.id_to_token[i] = word;