revert the wstring tokenization. coherency was affected

This commit is contained in:
Concedo 2023-06-24 12:58:49 +08:00
parent 6da38b0d40
commit 8342fe81b1
2 changed files with 9 additions and 9 deletions

View file

@ -225,7 +225,7 @@ maxhordectx = 1024
maxhordelen = 256 maxhordelen = 256
modelbusy = False modelbusy = False
defaultport = 5001 defaultport = 5001
KcppVersion = "1.32.2" KcppVersion = "1.32.3"
showdebug = True showdebug = True
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):

View file

@ -87,14 +87,14 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
word.assign(buf.data(), len); word.assign(buf.data(), len);
// Convert token from utf-8 // Convert token from utf-8
std::wstring word_multibytes = convert_to_wstring(word); // std::wstring word_multibytes = convert_to_wstring(word);
if(word_multibytes!=L"") // if(word_multibytes!=L"")
{ // {
word.resize(word_multibytes.size()); // word.resize(word_multibytes.size());
for (int w = 0; w < word_multibytes.size(); w++) { // for (int w = 0; w < word_multibytes.size(); w++) {
word[w] = uint8_t(word_multibytes[w]); // word[w] = uint8_t(word_multibytes[w]);
} // }
} // }
vocab.token_to_id[word] = i; vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word; vocab.id_to_token[i] = word;