Apply @jploski 's fix for missing tokens
This commit is contained in:
parent
5aee498d97
commit
3e518e255b
2 changed files with 2 additions and 2 deletions
|
@ -131,7 +131,7 @@ tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
|||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||
|
||||
for i in range(vocab_size):
|
||||
tokens.append(reverse_vocab[i])
|
||||
tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]")
|
||||
scores.append(0.0) # dummy
|
||||
toktypes.append(gguf.TokenType.NORMAL)
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@ tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
|||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||
|
||||
for i in range(vocab_size):
|
||||
tokens.append(reverse_vocab[i])
|
||||
tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]")
|
||||
scores.append(0.0) # dummy
|
||||
toktypes.append(gguf.TokenType.NORMAL)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue