bugfix: centos 7, gcc (GCC) 11.2.1 20220127 (Red Hat 11.2.1-9)
std::string mesh up vocab.
This commit is contained in:
parent
d7def1a752
commit
1d7e32bba7
1 changed files with 4 additions and 4 deletions
8
main.cpp
8
main.cpp
|
@ -144,15 +144,15 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
||||||
// load vocab
|
// load vocab
|
||||||
{
|
{
|
||||||
std::string word;
|
std::string word;
|
||||||
|
uint32_t len;
|
||||||
for (int i = 0; i < model.hparams.n_vocab; i++) {
|
for (int i = 0; i < model.hparams.n_vocab; i++) {
|
||||||
uint32_t len;
|
|
||||||
fin.read((char *) &len, sizeof(len));
|
fin.read((char *) &len, sizeof(len));
|
||||||
|
|
||||||
word.resize(len);
|
word.resize(len);
|
||||||
fin.read((char *) word.data(), len);
|
fin.read((char *) word.data(), len);
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
// force make a new std::string, some compiler may share inner data.
|
||||||
vocab.id_to_token[i] = word;
|
vocab.token_to_id[std::string(word.data(), len)] = i;
|
||||||
|
vocab.id_to_token[i] = std::string(word.data(), len);
|
||||||
|
|
||||||
//if (i < 30000) {
|
//if (i < 30000) {
|
||||||
// fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
|
// fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue