revert the wstring tokenization. coherency was affected
This commit is contained in:
parent
6da38b0d40
commit
8342fe81b1
2 changed files with 9 additions and 9 deletions
|
@ -225,7 +225,7 @@ maxhordectx = 1024
|
||||||
maxhordelen = 256
|
maxhordelen = 256
|
||||||
modelbusy = False
|
modelbusy = False
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.32.2"
|
KcppVersion = "1.32.3"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
|
|
||||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
|
|
|
@ -87,14 +87,14 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
|
||||||
word.assign(buf.data(), len);
|
word.assign(buf.data(), len);
|
||||||
|
|
||||||
// Convert token from utf-8
|
// Convert token from utf-8
|
||||||
std::wstring word_multibytes = convert_to_wstring(word);
|
// std::wstring word_multibytes = convert_to_wstring(word);
|
||||||
if(word_multibytes!=L"")
|
// if(word_multibytes!=L"")
|
||||||
{
|
// {
|
||||||
word.resize(word_multibytes.size());
|
// word.resize(word_multibytes.size());
|
||||||
for (int w = 0; w < word_multibytes.size(); w++) {
|
// for (int w = 0; w < word_multibytes.size(); w++) {
|
||||||
word[w] = uint8_t(word_multibytes[w]);
|
// word[w] = uint8_t(word_multibytes[w]);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
vocab.id_to_token[i] = word;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue